aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h121
-rw-r--r--lib/Target/ARM/ARM.td136
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h394
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h64
-rw-r--r--lib/Target/ARM/ARMCallingConv.td87
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1411
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp1285
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp100
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h92
-rw-r--r--lib/Target/ARM/ARMFrameInfo.h32
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp911
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp2346
-rw-r--r--lib/Target/ARM/ARMISelLowering.h184
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td868
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp1025
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h258
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1390
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td562
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td12
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td398
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp298
-rw-r--r--lib/Target/ARM/ARMJITInfo.h178
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp778
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h238
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp1528
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h102
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td221
-rw-r--r--lib/Target/ARM/ARMRelocations.h56
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp84
-rw-r--r--lib/Target/ARM/ARMSubtarget.h122
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.cpp291
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.h64
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp242
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h104
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp1117
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/ARM/AsmPrinter/Makefile15
-rw-r--r--lib/Target/ARM/CMakeLists.txt27
-rw-r--r--lib/Target/ARM/Makefile23
-rw-r--r--lib/Target/ARM/README-Thumb.txt228
-rw-r--r--lib/Target/ARM/README.txt554
-rw-r--r--lib/Target/Alpha/Alpha.h51
-rw-r--r--lib/Target/Alpha/Alpha.td66
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp67
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp242
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp553
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp798
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h114
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td268
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp450
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h97
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td1137
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.cpp307
-rw-r--r--lib/Target/Alpha/AlphaJITInfo.h47
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp158
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp335
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h67
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.td171
-rw-r--r--lib/Target/Alpha/AlphaRelocations.h31
-rw-r--r--lib/Target/Alpha/AlphaSchedule.td84
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.cpp25
-rw-r--r--lib/Target/Alpha/AlphaSubtarget.h47
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.cpp31
-rw-r--r--lib/Target/Alpha/AlphaTargetAsmInfo.h32
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp126
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h82
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp305
-rw-r--r--lib/Target/Alpha/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/Alpha/AsmPrinter/Makefile15
-rw-r--r--lib/Target/Alpha/CMakeLists.txt25
-rw-r--r--lib/Target/Alpha/Makefile22
-rw-r--r--lib/Target/Alpha/README.txt42
-rw-r--r--lib/Target/CBackend/CBackend.cpp3601
-rw-r--r--lib/Target/CBackend/CMakeLists.txt3
-rw-r--r--lib/Target/CBackend/CTargetMachine.h43
-rw-r--r--lib/Target/CBackend/Makefile14
-rw-r--r--lib/Target/CMakeLists.txt17
-rw-r--r--lib/Target/CellSPU/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/CellSPU/AsmPrinter/Makefile17
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp623
-rw-r--r--lib/Target/CellSPU/CMakeLists.txt24
-rw-r--r--lib/Target/CellSPU/CellSDKIntrinsics.td448
-rw-r--r--lib/Target/CellSPU/Makefile22
-rw-r--r--lib/Target/CellSPU/README.txt90
-rw-r--r--lib/Target/CellSPU/SPU.h102
-rw-r--r--lib/Target/CellSPU/SPU.td66
-rw-r--r--lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td394
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td115
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.cpp29
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.h79
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.cpp138
-rw-r--r--lib/Target/CellSPU/SPUHazardRecognizers.h41
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp1244
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp2980
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h154
-rw-r--r--lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td298
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp693
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h114
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td4614
-rw-r--r--lib/Target/CellSPU/SPUMachineFunction.h43
-rw-r--r--lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--lib/Target/CellSPU/SPUNodes.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td655
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp614
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h101
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td429
-rw-r--r--lib/Target/CellSPU/SPURegisterNames.h18
-rw-r--r--lib/Target/CellSPU/SPUSchedule.td57
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.cpp40
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h95
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.cpp74
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.h51
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp98
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h95
-rw-r--r--lib/Target/CppBackend/CMakeLists.txt3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp2007
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h44
-rw-r--r--lib/Target/CppBackend/Makefile14
-rw-r--r--lib/Target/DarwinTargetAsmInfo.cpp169
-rw-r--r--lib/Target/ELFTargetAsmInfo.cpp227
-rw-r--r--lib/Target/IA64/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp376
-rw-r--r--lib/Target/IA64/AsmPrinter/Makefile17
-rw-r--r--lib/Target/IA64/CMakeLists.txt20
-rw-r--r--lib/Target/IA64/IA64.h58
-rw-r--r--lib/Target/IA64/IA64.td39
-rw-r--r--lib/Target/IA64/IA64Bundling.cpp118
-rw-r--r--lib/Target/IA64/IA64ISelDAGToDAG.cpp575
-rw-r--r--lib/Target/IA64/IA64ISelLowering.cpp622
-rw-r--r--lib/Target/IA64/IA64ISelLowering.h76
-rw-r--r--lib/Target/IA64/IA64InstrBuilder.h40
-rw-r--r--lib/Target/IA64/IA64InstrFormats.td80
-rw-r--r--lib/Target/IA64/IA64InstrInfo.cpp193
-rw-r--r--lib/Target/IA64/IA64InstrInfo.h70
-rw-r--r--lib/Target/IA64/IA64InstrInfo.td751
-rw-r--r--lib/Target/IA64/IA64MachineFunctionInfo.h34
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.cpp319
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.h63
-rw-r--r--lib/Target/IA64/IA64RegisterInfo.td509
-rw-r--r--lib/Target/IA64/IA64Subtarget.cpp18
-rw-r--r--lib/Target/IA64/IA64Subtarget.h28
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.cpp44
-rw-r--r--lib/Target/IA64/IA64TargetAsmInfo.h33
-rw-r--r--lib/Target/IA64/IA64TargetMachine.cpp94
-rw-r--r--lib/Target/IA64/IA64TargetMachine.h64
-rw-r--r--lib/Target/IA64/Makefile20
-rw-r--r--lib/Target/IA64/README48
-rw-r--r--lib/Target/MSIL/CMakeLists.txt3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1680
-rw-r--r--lib/Target/MSIL/MSILWriter.h255
-rw-r--r--lib/Target/MSIL/Makefile14
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/MSP430/CMakeLists.txt23
-rw-r--r--lib/Target/MSP430/MSP430.h40
-rw-r--r--lib/Target/MSP430/MSP430.td60
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp267
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td37
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp194
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp670
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h103
-rw-r--r--lib/Target/MSP430/MSP430InstrFormats.td67
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp177
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h84
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td901
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h39
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp355
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h70
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td122
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp27
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h41
-rw-r--r--lib/Target/MSP430/MSP430TargetAsmInfo.cpp22
-rw-r--r--lib/Target/MSP430/MSP430TargetAsmInfo.h31
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp76
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h68
-rw-r--r--lib/Target/MSP430/Makefile21
-rw-r--r--lib/Target/MSP430/README.txt42
-rw-r--r--lib/Target/Makefile20
-rw-r--r--lib/Target/Mips/AsmPrinter/CMakeLists.txt12
-rw-r--r--lib/Target/Mips/AsmPrinter/Makefile17
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp580
-rw-r--r--lib/Target/Mips/CMakeLists.txt22
-rw-r--r--lib/Target/Mips/Makefile23
-rw-r--r--lib/Target/Mips/Mips.h41
-rw-r--r--lib/Target/Mips/Mips.td88
-rw-r--r--lib/Target/Mips/MipsCallingConv.td86
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp77
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp392
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1254
-rw-r--r--lib/Target/Mips/MipsISelLowering.h130
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td304
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td182
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp623
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h223
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td707
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h131
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp535
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h78
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td252
-rw-r--r--lib/Target/Mips/MipsSchedule.td63
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp77
-rw-r--r--lib/Target/Mips/MipsSubtarget.h139
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.cpp98
-rw-r--r--lib/Target/Mips/MipsTargetAsmInfo.h51
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp133
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h80
-rw-r--r--lib/Target/PIC16/CMakeLists.txt24
-rw-r--r--lib/Target/PIC16/Makefile21
-rw-r--r--lib/Target/PIC16/PIC16.h345
-rw-r--r--lib/Target/PIC16/PIC16.td40
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.cpp404
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.h70
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp270
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.h114
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.cpp59
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h60
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp1756
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h227
-rw-r--r--lib/Target/PIC16/PIC16InstrFormats.td117
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp186
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h70
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.td522
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp169
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp91
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h68
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.td33
-rw-r--r--lib/Target/PIC16/PIC16Subtarget.cpp27
-rw-r--r--lib/Target/PIC16/PIC16Subtarget.h45
-rw-r--r--lib/Target/PIC16/PIC16TargetAsmInfo.cpp264
-rw-r--r--lib/Target/PIC16/PIC16TargetAsmInfo.h79
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp79
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.h76
-rw-r--r--lib/Target/PowerPC/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/PowerPC/AsmPrinter/Makefile15
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp1204
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt28
-rw-r--r--lib/Target/PowerPC/Makefile22
-rw-r--r--lib/Target/PowerPC/PPC.h49
-rw-r--r--lib/Target/PowerPC/PPC.td114
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp174
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td66
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp266
-rw-r--r--lib/Target/PowerPC/PPCFrameInfo.h93
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp304
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h73
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp1170
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp4878
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h394
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td723
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td668
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td875
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp818
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h168
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td1475
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp437
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h48
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.cpp151
-rw-r--r--lib/Target/PowerPC/PPCMachOWriterInfo.h55
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h104
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--lib/Target/PowerPC/PPCPredicates.cpp30
-rw-r--r--lib/Target/PowerPC/PPCPredicates.h39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp1446
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h95
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td360
-rw-r--r--lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td508
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td63
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td73
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td76
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td83
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp152
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h160
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.cpp161
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.h62
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp250
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h120
-rw-r--r--lib/Target/PowerPC/README.txt799
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt211
-rw-r--r--lib/Target/README.txt1679
-rw-r--r--lib/Target/Sparc/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/Sparc/AsmPrinter/Makefile15
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp355
-rw-r--r--lib/Target/Sparc/CMakeLists.txt23
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp76
-rw-r--r--lib/Target/Sparc/FPMover.cpp139
-rw-r--r--lib/Target/Sparc/Makefile22
-rw-r--r--lib/Target/Sparc/README.txt58
-rw-r--r--lib/Target/Sparc/Sparc.h119
-rw-r--r--lib/Target/Sparc/Sparc.td76
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td32
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp215
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp1049
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h79
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td114
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp277
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h114
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td769
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp196
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h67
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td158
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp43
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h43
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.cpp50
-rw-r--r--lib/Target/Sparc/SparcTargetAsmInfo.h33
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp94
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h63
-rw-r--r--lib/Target/SubtargetFeature.cpp364
-rw-r--r--lib/Target/Target.cpp94
-rw-r--r--lib/Target/TargetAsmInfo.cpp461
-rw-r--r--lib/Target/TargetData.cpp603
-rw-r--r--lib/Target/TargetFrameInfo.cpp19
-rw-r--r--lib/Target/TargetInstrInfo.cpp50
-rw-r--r--lib/Target/TargetIntrinsicInfo.cpp22
-rw-r--r--lib/Target/TargetMachOWriterInfo.cpp25
-rw-r--r--lib/Target/TargetMachine.cpp229
-rw-r--r--lib/Target/TargetMachineRegistry.cpp78
-rw-r--r--lib/Target/TargetRegisterInfo.cpp144
-rw-r--r--lib/Target/TargetSubtarget.cpp22
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt11
-rw-r--r--lib/Target/X86/AsmPrinter/Makefile15
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp1075
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h164
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp50
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp609
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h152
-rw-r--r--lib/Target/X86/CMakeLists.txt29
-rw-r--r--lib/Target/X86/Makefile23
-rw-r--r--lib/Target/X86/README-FPStack.txt85
-rw-r--r--lib/Target/X86/README-MMX.txt71
-rw-r--r--lib/Target/X86/README-SSE.txt918
-rw-r--r--lib/Target/X86/README-UNIMPLEMENTED.txt14
-rw-r--r--lib/Target/X86/README-X86-64.txt251
-rw-r--r--lib/Target/X86/README.txt1899
-rw-r--r--lib/Target/X86/X86.h84
-rw-r--r--lib/Target/X86/X86.td184
-rw-r--r--lib/Target/X86/X86COFF.h95
-rw-r--r--lib/Target/X86/X86CallingConv.td360
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp811
-rw-r--r--lib/Target/X86/X86CompilationCallback_Win64.asm67
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp18
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h29
-rw-r--r--lib/Target/X86/X86FastISel.cpp1549
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp1187
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp139
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp1716
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8794
-rw-r--r--lib/Target/X86/X86ISelLowering.h705
-rw-r--r--lib/Target/X86/X86Instr64bit.td1937
-rw-r--r--lib/Target/X86/X86InstrBuilder.h168
-rw-r--r--lib/Target/X86/X86InstrFPStack.td597
-rw-r--r--lib/Target/X86/X86InstrFormats.td285
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3227
-rw-r--r--lib/Target/X86/X86InstrInfo.h461
-rw-r--r--lib/Target/X86/X86InstrInfo.td3961
-rw-r--r--lib/Target/X86/X86InstrMMX.td694
-rw-r--r--lib/Target/X86/X86InstrSSE.td3643
-rw-r--r--lib/Target/X86/X86JITInfo.cpp560
-rw-r--r--lib/Target/X86/X86JITInfo.h84
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h112
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp1280
-rw-r--r--lib/Target/X86/X86RegisterInfo.h163
-rw-r--r--lib/Target/X86/X86RegisterInfo.td762
-rw-r--r--lib/Target/X86/X86Relocations.h42
-rw-r--r--lib/Target/X86/X86Subtarget.cpp446
-rw-r--r--lib/Target/X86/X86Subtarget.h224
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.cpp461
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.h75
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp317
-rw-r--r--lib/Target/X86/X86TargetMachine.h124
-rw-r--r--lib/Target/XCore/CMakeLists.txt23
-rw-r--r--lib/Target/XCore/Makefile21
-rw-r--r--lib/Target/XCore/README.txt8
-rw-r--r--lib/Target/XCore/XCore.h42
-rw-r--r--lib/Target/XCore/XCore.td62
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp472
-rw-r--r--lib/Target/XCore/XCoreCallingConv.td33
-rw-r--r--lib/Target/XCore/XCoreFrameInfo.cpp27
-rw-r--r--lib/Target/XCore/XCoreFrameInfo.h34
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp230
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp934
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h123
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td120
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp524
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h110
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td991
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h69
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp598
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h94
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td91
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp28
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h46
-rw-r--r--lib/Target/XCore/XCoreTargetAsmInfo.cpp201
-rw-r--r--lib/Target/XCore/XCoreTargetAsmInfo.h45
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp71
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h63
398 files changed, 145994 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
new file mode 100644
index 000000000000..ac7de911ceec
--- /dev/null
+++ b/lib/Target/ARM/ARM.h
@@ -0,0 +1,121 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMTargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+class JITCodeEmitter;
+class raw_ostream;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ // The CondCodes constants map directly to the 4-bit encoding of the
+ // condition field for predicated instructions.
+ enum CondCodes {
+ EQ,
+ NE,
+ HS,
+ LO,
+ MI,
+ PL,
+ VS,
+ VC,
+ HI,
+ LS,
+ GE,
+ LT,
+ GT,
+ LE,
+ AL
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC){
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
+ }
+}
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
+ }
+}
+
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(raw_ostream &O,
+ ARMTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+
+FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for ARM registers. This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+
+#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
new file mode 100644
index 000000000000..4ac6857d22d4
--- /dev/null
+++ b/lib/Target/ARM/ARM.td
@@ -0,0 +1,136 @@
+//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+ "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
+ "Enable VFP3 instructions">;
+def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
+ "Enable NEON instructions">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+ "Enable Thumb2 instructions">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic", []>;
+def : Proc<"arm8", []>;
+def : Proc<"arm810", []>;
+def : Proc<"strongarm", []>;
+def : Proc<"strongarm110", []>;
+def : Proc<"strongarm1100", []>;
+def : Proc<"strongarm1110", []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi", [ArchV4T]>;
+def : Proc<"arm7tdmi-s", [ArchV4T]>;
+def : Proc<"arm710t", [ArchV4T]>;
+def : Proc<"arm720t", [ArchV4T]>;
+def : Proc<"arm9", [ArchV4T]>;
+def : Proc<"arm9tdmi", [ArchV4T]>;
+def : Proc<"arm920", [ArchV4T]>;
+def : Proc<"arm920t", [ArchV4T]>;
+def : Proc<"arm922t", [ArchV4T]>;
+def : Proc<"arm940t", [ArchV4T]>;
+def : Proc<"ep9312", [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi", [ArchV5T]>;
+def : Proc<"arm1020t", [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e", [ArchV5TE]>;
+def : Proc<"arm926ej-s", [ArchV5TE]>;
+def : Proc<"arm946e-s", [ArchV5TE]>;
+def : Proc<"arm966e-s", [ArchV5TE]>;
+def : Proc<"arm968e-s", [ArchV5TE]>;
+def : Proc<"arm10e", [ArchV5TE]>;
+def : Proc<"arm1020e", [ArchV5TE]>;
+def : Proc<"arm1022e", [ArchV5TE]>;
+def : Proc<"xscale", [ArchV5TE]>;
+def : Proc<"iwmmxt", [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s", [ArchV6]>;
+def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s", [ArchV6]>;
+def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp", [ArchV6]>;
+def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
+
+def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>;
+
+def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = ["AddrModeBits",
+ "SizeFlag",
+ "IndexModeBits",
+ "isUnaryDataProc",
+ "Form"];
+ let TSFlagsShifts = [0,
+ 4,
+ 7,
+ 9,
+ 10];
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = ARMInstrInfo;
+}
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 000000000000..6d9b9ee88000
--- /dev/null
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,394 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return isLD ? "fd" : "ea";
+ case ARM_AM::ib: return isLD ? "ed" : "fa";
+ case ARM_AM::da: return isLD ? "fa" : "ed";
+ case ARM_AM::db: return isLD ? "ea" : "fd";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should skip the first run of ones, then
+ // retry the hunt.
+ if (Imm & 1) {
+ unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+ if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
+ // Restart the search for a high-order bit after the initial seconds of
+ // ones.
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+
+ // Rotate amount must be even.
+ unsigned RotAmt2 = TZ2 & ~1;
+
+ // If this fits, use it.
+ if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13);
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)(AM2Opc >> 13);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ //
+ // If the 4th bit (writeback)is set, then the base register is updated after
+ // the memory transfer.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+ return (int)SubMode | ((int)WB << 3);
+ }
+
+ static inline bool getAM4WBFlag(unsigned Mode) {
+ return (Mode >> 3) & 1;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+ //
+ // This can also be used for FP load/store multiple ops. The third field encodes
+ // writeback mode in bit 8, the number of registers (or 2 times the number of
+ // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+ // following two sub-modes:
+ //
+ // IA - Increment after
+ // DB - Decrement before
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+ /// FSTM instructions.
+ static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+ unsigned char Offset) {
+ assert((SubMode == ia || SubMode == db) &&
+ "Illegal addressing mode 5 sub-mode!");
+ return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+ }
+ static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+ return (AMSubMode)((AM5Opc >> 9) & 0x7);
+ }
+ static inline bool getAM5WBFlag(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1);
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 000000000000..3b38375fbc71
--- /dev/null
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,64 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.07).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+ enum {
+ File = 1,
+ Section = 2,
+ Symbol = 3,
+ CPU_raw_name = 4,
+ CPU_name = 5,
+ CPU_arch = 6,
+ CPU_arch_profile = 7,
+ ARM_ISA_use = 8,
+ THUMB_ISA_use = 9,
+ VFP_arch = 10,
+ WMMX_arch = 11,
+ Advanced_SIMD_arch = 12,
+ PCS_config = 13,
+ ABI_PCS_R9_use = 14,
+ ABI_PCS_RW_data = 15,
+ ABI_PCS_RO_data = 16,
+ ABI_PCS_GOT_use = 17,
+ ABI_PCS_wchar_t = 18,
+ ABI_FP_rounding = 19,
+ ABI_FP_denormal = 20,
+ ABI_FP_exceptions = 21,
+ ABI_FP_user_exceptions = 22,
+ ABI_FP_number_model = 23,
+ ABI_align8_needed = 24,
+ ABI_align8_preserved = 25,
+ ABI_enum_size = 26,
+ ABI_HardFP_use = 27,
+ ABI_VFP_args = 28,
+ ABI_WMMX_args = 29,
+ ABI_optimization_goals = 30,
+ ABI_FP_optimization_goals = 31,
+ compatibility = 32,
+ CPU_unaligned_access = 34,
+ VFP_HP_extension = 36,
+ ABI_FP_16bit_format = 38,
+ nodefaults = 64,
+ also_compatible_with = 65,
+ T2EE_use = 66,
+ conformance = 67,
+ Virtualization_use = 68,
+ MPextension_use = 70
+ };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 000000000000..6cd786eed4d5
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,87 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+ CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // f64 is passed in pairs of GPRs, possibly split onto the stack
+ CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_AAPCS : CallingConv<[
+
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // i64/f64 is passed in even pairs of GPRs
+ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+ // (and the same is true for f64 if VFP is not enabled)
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+ CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+ "ArgFlags.getOrigAlign() != 8",
+ CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
+ CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetCC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
+ CCDelegateTo<RetCC_ARM_APCS>
+]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 000000000000..44fac12019b3
--- /dev/null
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1411 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+ class ARMCodeEmitter {
+ public:
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+ };
+
+ template<class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public ARMCodeEmitter {
+ ARMJITInfo *JTI;
+ const ARMInstrInfo *II;
+ const TargetData *TD;
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ const std::vector<MachineJumpTableEntry> *MJTEs;
+ bool IsPIC;
+
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(TargetMachine &tm, CodeEmitter &mce,
+ const ARMInstrInfo &ii, const TargetData &td)
+ : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm),
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "ARM Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+
+ void emitWordLE(unsigned Binary);
+
+ void emitDWordLE(uint64_t Binary);
+
+ void emitConstPoolInstruction(const MachineInstr &MI);
+
+ void emitMOVi2piecesInstruction(const MachineInstr &MI);
+
+ void emitLEApcrelJTInstruction(const MachineInstr &MI);
+
+ void emitPseudoMoveInstruction(const MachineInstr &MI);
+
+ void addPCLabel(unsigned LabelID);
+
+ void emitPseudoInstruction(const MachineInstr &MI);
+
+ unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx);
+
+ unsigned getMachineSoImmOpValue(unsigned SoImm);
+
+ unsigned getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const;
+
+ void emitDataProcessingInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRd = 0,
+ unsigned ImplicitRn = 0);
+
+ void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn = 0);
+
+ void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMulFrmInstruction(const MachineInstr &MI);
+
+ void emitExtendInstruction(const MachineInstr &MI);
+
+ void emitMiscArithInstruction(const MachineInstr &MI);
+
+ void emitBranchInstruction(const MachineInstr &MI);
+
+ void emitInlineJumpTable(unsigned JTIndex);
+
+ void emitMiscBranchInstruction(const MachineInstr &MI);
+
+ void emitVFPArithInstruction(const MachineInstr &MI);
+
+ void emitVFPConversionInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+ void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+ void emitMiscInstruction(const MachineInstr &MI);
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
+ unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) {
+ return getMachineOpValue(MI, MI.getOperand(OpIdx));
+ }
+
+ /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+ ///
+ unsigned getShiftOp(unsigned Imm) const ;
+
+ /// Routines that handle operands which add machine relocations which are
+ /// fixed up by the relocation stage.
+ void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ bool NeedStub, intptr_t ACPV = 0);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
+ void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+ intptr_t JTBase = 0);
+ };
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
+/// to the specified MCE object.
+
+namespace llvm {
+
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+} // end namespace llvm
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+ II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
+ TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+ JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ JTI->Initialize(MF, IsPIC);
+
+ do {
+ DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I)
+ emitInstruction(*I);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
+ switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return 2;
+ case ARM_AM::lsl: return 0;
+ case ARM_AM::lsr: return 1;
+ case ARM_AM::ror:
+ case ARM_AM::rrx: return 3;
+ }
+ return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+ if (MO.isReg())
+ return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+ else if (MO.isCPI()) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ // For VFP load, the immediate offset is multiplied by 4.
+ unsigned Reloc = ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+ ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+ emitConstPoolAddress(MO.getIndex(), Reloc);
+ } else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+ else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+ return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ bool NeedStub, intptr_t ACPV) {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ GV, ACPV, NeedStub));
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
+ unsigned Reloc) {
+ // Tell JIT emitter we'll resolve the address.
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex,
+ unsigned Reloc) {
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc, intptr_t JTBase) {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB, JTBase));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) {
+#ifndef NDEBUG
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << Binary << std::dec << "\n";
+#endif
+ MCE.emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) {
+#ifndef NDEBUG
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << (unsigned)Binary << std::dec << "\n";
+ DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0')
+ << (unsigned)(Binary >> 32) << std::dec << "\n";
+#endif
+ MCE.emitDWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
+ DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI;
+
+ NumEmitted++; // Keep track of the # of mi's emitted
+ switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+ default: {
+ assert(0 && "Unhandled instruction encoding format!");
+ break;
+ }
+ case ARMII::Pseudo:
+ emitPseudoInstruction(MI);
+ break;
+ case ARMII::DPFrm:
+ case ARMII::DPSoRegFrm:
+ emitDataProcessingInstruction(MI);
+ break;
+ case ARMII::LdFrm:
+ case ARMII::StFrm:
+ emitLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdMiscFrm:
+ case ARMII::StMiscFrm:
+ emitMiscLoadStoreInstruction(MI);
+ break;
+ case ARMII::LdStMulFrm:
+ emitLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::MulFrm:
+ emitMulFrmInstruction(MI);
+ break;
+ case ARMII::ExtFrm:
+ emitExtendInstruction(MI);
+ break;
+ case ARMII::ArithMiscFrm:
+ emitMiscArithInstruction(MI);
+ break;
+ case ARMII::BrFrm:
+ emitBranchInstruction(MI);
+ break;
+ case ARMII::BrMiscFrm:
+ emitMiscBranchInstruction(MI);
+ break;
+ // VFP instructions.
+ case ARMII::VFPUnaryFrm:
+ case ARMII::VFPBinaryFrm:
+ emitVFPArithInstruction(MI);
+ break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ emitVFPConversionInstruction(MI);
+ break;
+ case ARMII::VFPLdStFrm:
+ emitVFPLoadStoreInstruction(MI);
+ break;
+ case ARMII::VFPLdStMulFrm:
+ emitVFPLoadStoreMultipleInstruction(MI);
+ break;
+ case ARMII::VFPMiscFrm:
+ emitMiscInstruction(MI);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
+ unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index.
+ unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+ const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+ // Remember the CONSTPOOL_ENTRY address for later relocation.
+ JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+ // Emit constpool island entry. In most cases, the actual values will be
+ // resolved and relocated after code emission.
+ if (MCPE.isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+ DOUT << " ** ARM constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n';
+
+ GlobalValue *GV = ACPV->getGV();
+ if (GV) {
+ assert(!ACPV->isStub() && "Don't know how to deal this yet!");
+ if (ACPV->isNonLazyPointer())
+ MCE.addRelocation(MachineRelocation::getIndirectSymbol(
+ MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV,
+ (intptr_t)ACPV, false));
+ else
+ emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+ ACPV->isStub() || isa<Function>(GV), (intptr_t)ACPV);
+ } else {
+ assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!");
+ emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+ }
+ emitWordLE(0);
+ } else {
+ Constant *CV = MCPE.Val.ConstVal;
+
+#ifndef NDEBUG
+ DOUT << " ** Constant pool #" << CPI << " @ "
+ << (void*)MCE.getCurrentPCValue() << " ";
+ if (const Function *F = dyn_cast<Function>(CV))
+ DOUT << F->getName();
+ else
+ DOUT << *CV;
+ DOUT << '\n';
+#endif
+
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV));
+ emitWordLE(0);
+ } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
+ emitWordLE(Val);
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ if (CFP->getType() == Type::FloatTy)
+ emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else if (CFP->getType() == Type::DoubleTy)
+ emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+ else {
+ assert(0 && "Unable to handle this constantpool entry!");
+ abort();
+ }
+ } else {
+ assert(0 && "Unable to handle this constantpool entry!");
+ abort();
+ }
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+ assert(MO1.isImm() && "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+ // Emit the 'mov' instruction.
+ unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1));
+ emitWordLE(Binary);
+
+ // Now the 'orr' instruction.
+ Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2));
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+ // It's basically add r, pc, (LJTI - $+8)
+
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Emit the 'add' instruction.
+ unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode Rn which is PC.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+ // Encode the displacement.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+ Binary |= 1 << ARMII::I_BitShift;
+ emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+ Binary |= 1 << ARMII::S_BitShift;
+
+ // Encode register def if there is one.
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode the shift operation.
+ switch (Opcode) {
+ default: break;
+ case ARM::MOVrx:
+ // rrx
+ Binary |= 0x6 << 4;
+ break;
+ case ARM::MOVsrl_flag:
+ // lsr #1
+ Binary |= (0x2 << 4) | (1 << 7);
+ break;
+ case ARM::MOVsra_flag:
+ // asr #1
+ Binary |= (0x4 << 4) | (1 << 7);
+ break;
+ }
+
+ // Encode register Rm.
+ Binary |= getMachineOpValue(MI, 1);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) {
+ DOUT << " ** LPC" << LabelID << " @ "
+ << (void*)MCE.getCurrentPCValue() << '\n';
+ JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
+ switch (Opcode) {
+ default:
+ abort(); // FIXME:
+ case TargetInstrInfo::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ assert(0 && "JIT does not support inline asm!\n");
+ abort();
+ }
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case ARM::DWARF_LOC:
+ // Do nothing.
+ break;
+ case ARM::CONSTPOOL_ENTRY:
+ emitConstPoolInstruction(MI);
+ break;
+ case ARM::PICADD: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // PICADD is just an add instruction that implicitly read pc.
+ emitDataProcessingInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDR:
+ case ARM::PICLDRB:
+ case ARM::PICSTR:
+ case ARM::PICSTRB: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitLoadStoreInstruction(MI, 0, ARM::PC);
+ break;
+ }
+ case ARM::PICLDRH:
+ case ARM::PICLDRSH:
+ case ARM::PICLDRSB:
+ case ARM::PICSTRH: {
+ // Remember of the address of the PC label for relocation later.
+ addPCLabel(MI.getOperand(2).getImm());
+ // These are just load / store instructions that implicitly read pc.
+ emitMiscLoadStoreInstruction(MI, ARM::PC);
+ break;
+ }
+ case ARM::MOVi2pieces:
+ // Two instructions to materialize a constant.
+ emitMOVi2piecesInstruction(MI);
+ break;
+ case ARM::LEApcrelJT:
+ // Materialize jumptable address.
+ emitLEApcrelJTInstruction(MI);
+ break;
+ case ARM::MOVrx:
+ case ARM::MOVsrl_flag:
+ case ARM::MOVsra_flag:
+ emitPseudoMoveInstruction(MI);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
+ const MachineInstr &MI,
+ const TargetInstrDesc &TID,
+ const MachineOperand &MO,
+ unsigned OpIdx) {
+ unsigned Binary = getMachineOpValue(MI, MO);
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+ ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+ // Encode the shift opcode.
+ unsigned SBits = 0;
+ unsigned Rs = MO1.getReg();
+ if (Rs) {
+ // Set shift operand (bit[7:4]).
+ // LSL - 0001
+ // LSR - 0011
+ // ASR - 0101
+ // ROR - 0111
+ // RRX - 0110 and bit[11:8] clear.
+ switch (SOpc) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x1; break;
+ case ARM_AM::lsr: SBits = 0x3; break;
+ case ARM_AM::asr: SBits = 0x5; break;
+ case ARM_AM::ror: SBits = 0x7; break;
+ case ARM_AM::rrx: SBits = 0x6; break;
+ }
+ } else {
+ // Set shift operand (bit[6:4]).
+ // LSL - 000
+ // LSR - 010
+ // ASR - 100
+ // ROR - 110
+ switch (SOpc) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::lsl: SBits = 0x0; break;
+ case ARM_AM::lsr: SBits = 0x2; break;
+ case ARM_AM::asr: SBits = 0x4; break;
+ case ARM_AM::ror: SBits = 0x6; break;
+ }
+ }
+ Binary |= SBits << 4;
+ if (SOpc == ARM_AM::rrx)
+ return Binary;
+
+ // Encode the shift operation Rs or shift_imm (except rrx).
+ if (Rs) {
+ // Encode Rs bit[11:8].
+ assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+ return Binary |
+ (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift);
+ }
+
+ // Encode shift_imm bit[11:7].
+ return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
+ // Encode rotate_imm.
+ unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1)
+ << ARMII::SoRotImmShift;
+
+ // Encode immed_8.
+ Binary |= ARM_AM::getSOImmValImm(SoImm);
+ return Binary;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI,
+ const TargetInstrDesc &TID) const {
+ for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+ const MachineOperand &MO = MI.getOperand(i-1);
+ if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+ return 1 << ARMII::S_BitShift;
+ }
+ return 0;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDataProcessingInstruction(
+ const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // Encode register def if there is one.
+ unsigned NumDefs = TID.getNumDefs();
+ unsigned OpIdx = 0;
+ if (NumDefs)
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ else if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+ << ARMII::RegRdShift);
+
+ // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode first non-shifter register operand if there is one.
+ bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+ if (!isUnary) {
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else {
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+ ++OpIdx;
+ }
+ }
+
+ // Encode shifter operand.
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+ // Encode SoReg.
+ emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+ return;
+ }
+
+ if (MO.isReg()) {
+ // Encode register Rm.
+ emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg()));
+ return;
+ }
+
+ // Encode so_imm.
+ // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+ Binary |= 1 << ARMII::I_BitShift;
+ Binary |= getMachineSoImmOpValue(MO.getImm());
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreInstruction(
+ const MachineInstr &MI,
+ unsigned ImplicitRd,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ if (ImplicitRd)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+ << ARMII::RegRdShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDR_PRE.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM2Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative).
+ Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+ if (!MO2.getReg()) { // is immediate
+ if (ARM_AM::getAM2Offset(AM2Opc))
+ // Set the value of offset_12 field
+ Binary |= ARM_AM::getAM2Offset(AM2Opc);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Set bit I(25), because this is not in immediate enconding.
+ Binary |= 1 << ARMII::I_BitShift;
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ // Set bit[3:0] to the corresponding Rm register
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+
+ // If this instr is in scaled register offset/index instruction, set
+ // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+ if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+ Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift
+ Binary |= ShImm << ARMII::ShiftShift; // shift_immed
+ }
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+ unsigned ImplicitRn) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+ bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Operand 0 of a pre- and post-indexed store is the address base
+ // writeback. Skip it.
+ bool Skipped = false;
+ if (IsPrePost && Form == ARMII::StMiscFrm) {
+ ++OpIdx;
+ Skipped = true;
+ }
+
+ // Set first operand
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ // Set second operand
+ if (ImplicitRn)
+ // Special handling for implicit use (e.g. PC).
+ Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+ << ARMII::RegRnShift);
+ else
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+ // If this is a two-address operand, skip it. e.g. LDRH_POST.
+ if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ unsigned AM3Opc = (ImplicitRn == ARM::PC)
+ ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+ // Set bit U(23) according to sign of immed value (positive or negative)
+ Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+ ARMII::U_BitShift);
+
+ // If this instr is in register offset/index encoding, set bit[3:0]
+ // to the corresponding Rm register.
+ if (MO2.getReg()) {
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+ emitWordLE(Binary);
+ return;
+ }
+
+ // This instr is in immediate offset/index encoding, set bit 22 to 1.
+ Binary |= 1 << ARMII::AM3_I_BitShift;
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+ // Set operands
+ Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH
+ Binary |= (ImmOffs & 0xF); // immedL
+ }
+
+ emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+ unsigned Binary = 0;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ // IA - Increment after - bit U = 1 and bit P = 0
+ // IB - Increment before - bit U = 1 and bit P = 1
+ // DA - Decrement after - bit U = 0 and bit P = 0
+ // DB - Decrement before - bit U = 0 and bit P = 1
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::da: break;
+ case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+ case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+ case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+ }
+
+ return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
+ const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ const MachineOperand &MO = MI.getOperand(1);
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+
+ // Set bit W(21)
+ if (ARM_AM::getAM4WBFlag(MO.getImm()))
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // Set registers
+ for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ RegNum < 16);
+ Binary |= 0x1 << RegNum;
+ }
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode S bit if MI modifies CPSR.
+ Binary |= getAddrModeSBit(MI, TID);
+
+ // 32x32->64bit operations have two destination registers. The number
+ // of register definitions will tell us if that's what we're dealing with.
+ unsigned OpIdx = 0;
+ if (TID.getNumDefs() == 2)
+ Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode Rs
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+ // Many multiple instructions (e.g. MLA) have three src operands. Encode
+ // it as Rn (for multiply, that's in the same offset as RdLo.
+ if (TID.getNumOperands() > OpIdx &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+ const MachineOperand &MO2 = MI.getOperand(OpIdx);
+ if (MO2.isReg()) {
+ // Two register operand form.
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, MO2);
+ ++OpIdx;
+ } else {
+ Binary |= getMachineOpValue(MI, MO1);
+ }
+
+ // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+ if (MI.getOperand(OpIdx).isImm() &&
+ !TID.OpInfo[OpIdx].isPredicate() &&
+ !TID.OpInfo[OpIdx].isOptionalDef())
+ Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+ const MachineOperand &MO = MI.getOperand(OpIdx++);
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // Encode Rm and it's done.
+ Binary |= getMachineOpValue(MI, MO);
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Rn.
+ Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+ // Encode Rm.
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ // Encode shift_imm.
+ unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ if (TID.Opcode == ARM::TPsoft)
+ abort(); // FIXME
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set signed_immed_24 field
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
+ // Remember the base address of the inline jump table.
+ uintptr_t JTBase = MCE.getCurrentPCValue();
+ JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+ DOUT << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n';
+
+ // Now emit the jump table entries.
+ const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ if (IsPIC)
+ // DestBB address - JT base.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+ else
+ // Absolute DestBB address.
+ emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+ emitWordLE(0);
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Handle jump tables.
+ if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+ // First emit a ldr pc, [] instruction.
+ emitDataProcessingInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ unsigned JTIndex = (TID.Opcode == ARM::BR_JTr)
+ ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+ emitInlineJumpTable(JTIndex);
+ return;
+ } else if (TID.Opcode == ARM::BR_JTm) {
+ // First emit a ldr pc, [] instruction.
+ emitLoadStoreInstruction(MI, ARM::PC);
+
+ // Then emit the inline jump table.
+ emitInlineJumpTable(MI.getOperand(3).getIndex());
+ return;
+ }
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ if (TID.Opcode == ARM::BX_RET)
+ // The return register is LR.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
+ else
+ // otherwise, set the return register
+ Binary |= getMachineOpValue(MI, 0);
+
+ emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegD << ARMII::RegRdShift;
+ else {
+ Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+ Binary |= (RegD & 0x01) << ARMII::D_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegN << ARMII::RegRnShift;
+ else {
+ Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+ Binary |= (RegN & 0x01) << ARMII::N_BitShift;
+ }
+ return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ bool isSPVFP = false;
+ RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP);
+ if (!isSPVFP)
+ Binary |= RegM;
+ else {
+ Binary |= ((RegM & 0x1E) >> 1);
+ Binary |= (RegM & 0x01) << ARMII::M_BitShift;
+ }
+ return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+ assert((Binary & ARMII::D_BitShift) == 0 &&
+ (Binary & ARMII::N_BitShift) == 0 &&
+ (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // If this is a two-address operand, skip it, e.g. FMACD.
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+
+ // Encode Dn / Sn.
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+ Binary |= encodeVFPRn(MI, OpIdx++);
+
+ if (OpIdx == TID.getNumOperands() ||
+ TID.OpInfo[OpIdx].isPredicate() ||
+ TID.OpInfo[OpIdx].isOptionalDef()) {
+ // FCMPEZD etc. has only one operand.
+ emitWordLE(Binary);
+ return;
+ }
+
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, OpIdx);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPConversionInstruction(
+ const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Form = TID.TSFlags & ARMII::FormMask;
+
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+ break;
+ case ARMII::VFPConv4Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 0);
+ break;
+ case ARMII::VFPConv5Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 0);
+ break;
+ }
+
+ switch (Form) {
+ default: break;
+ case ARMII::VFPConv1Frm:
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 1);
+ break;
+ case ARMII::VFPConv2Frm:
+ case ARMII::VFPConv3Frm:
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 1);
+ break;
+ case ARMII::VFPConv4Frm:
+ case ARMII::VFPConv5Frm:
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 1);
+ break;
+ }
+
+ if (Form == ARMII::VFPConv5Frm)
+ // Encode Dn / Sn.
+ Binary |= encodeVFPRn(MI, 2);
+ else if (Form == ARMII::VFPConv3Frm)
+ // Encode Dm / Sm.
+ Binary |= encodeVFPRm(MI, 2);
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ unsigned OpIdx = 0;
+
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, OpIdx++);
+
+ // Encode address base.
+ const MachineOperand &Base = MI.getOperand(OpIdx++);
+ Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+ // If there is a non-zero immediate offset, encode it.
+ if (Base.isReg()) {
+ const MachineOperand &Offset = MI.getOperand(OpIdx);
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+ if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+ Binary |= 1 << ARMII::U_BitShift;
+ Binary |= ImmOffs;
+ emitWordLE(Binary);
+ return;
+ }
+ }
+
+ // If immediate offset is omitted, default to +0.
+ Binary |= 1 << ARMII::U_BitShift;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
+ const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Set base address operand
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+ // Set addressing mode by modifying bits U(23) and P(24)
+ const MachineOperand &MO = MI.getOperand(1);
+ Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+
+ // Set bit W(21)
+ if (ARM_AM::getAM5WBFlag(MO.getImm()))
+ Binary |= 0x1 << ARMII::W_BitShift;
+
+ // First register is encoded in Dd.
+ Binary |= encodeVFPRd(MI, 4);
+
+ // Number of registers are encoded in offset field.
+ unsigned NumRegs = 1;
+ for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ break;
+ ++NumRegs;
+ }
+ Binary |= NumRegs * 2;
+
+ emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) {
+ // Part of binary is determined by TableGn.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
+
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 000000000000..db723fe8325e
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1285 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+namespace {
+ /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number. The two-byte pads required for Thumb alignment are
+ /// counted as part of the following block (i.e., the offset and size for
+ /// a padded block will both be ==2 mod 4).
+ std::vector<unsigned> BBSizes;
+
+ /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+ /// The two-byte pads required for Thumb alignment are counted as part of
+ /// the following block.
+ std::vector<unsigned> BBOffsets;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ unsigned MaxDisp;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+ ///
+ SmallVector<MachineInstr*, 4> PushPopMIs;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ const TargetInstrInfo *TII;
+ ARMFunctionInfo *AFI;
+ bool isThumb;
+ public:
+ static char ID;
+ ARMConstantIslands() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement and branch shortening pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs);
+ MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+ bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+ int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+ bool LookForWater(CPUser&U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP);
+ void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock** NewMBB);
+ bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex);
+ void RemoveDeadCPEMI(MachineInstr *CPEMI);
+ bool RemoveUnusedCPEntries();
+ bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp,
+ bool DoDump);
+ bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U);
+ bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK);
+ bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br);
+ bool UndoLRSpillRestore();
+
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ void dumpBBs();
+ void verify(MachineFunction &Fn);
+ };
+ char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &Fn) {
+ assert(BBOffsets.size() == BBSizes.size());
+ for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+ assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+ if (isThumb) {
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty() &&
+ MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ assert((BBOffsets[MBB->getNumber()]%4 == 0 &&
+ BBSizes[MBB->getNumber()]%4 == 0) ||
+ (BBOffsets[MBB->getNumber()]%4 != 0 &&
+ BBSizes[MBB->getNumber()]%4 != 0));
+ }
+ }
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+ DOUT << "block " << J << " offset " << BBOffsets[J] <<
+ " size " << BBSizes[J] << "\n";
+ }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+ MachineConstantPool &MCP = *Fn.getConstantPool();
+
+ TII = Fn.getTarget().getInstrInfo();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+
+ HasFarJump = false;
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ Fn.RenumberBlocks();
+
+ /// Thumb functions containing constant pools get 2-byte alignment.
+ /// This is so we can keep exact track of where the alignment padding goes.
+ /// Set default.
+ AFI->setAlign(isThumb ? 1U : 2U);
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP.isEmpty()) {
+ DoInitialPlacement(Fn, CPEMIs);
+ if (isThumb)
+ AFI->setAlign(2U);
+ }
+
+ /// The next UID to take is the first unused one.
+ AFI->initConstPoolEntryUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(Fn, CPEMIs);
+ CPEMIs.clear();
+
+ /// Remove dead constant pool entries.
+ RemoveUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ bool MadeChange = false;
+ while (true) {
+ bool Change = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ Change |= HandleConstantPoolUser(Fn, i);
+ DEBUG(dumpBBs());
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ Change |= FixUpImmediateBr(Fn, ImmBranches[i]);
+ DEBUG(dumpBBs());
+ if (!Change)
+ break;
+ MadeChange = true;
+ }
+
+ // After a while, this might be made debug-only, but it is not expensive.
+ verify(Fn);
+
+ // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
+ // Undo the spill / restore of LR if possible.
+ if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb)
+ MadeChange |= UndoLRSpillRestore();
+
+ BBSizes.clear();
+ BBOffsets.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ PushPopMIs.clear();
+
+ return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = Fn.CreateMachineBasicBlock();
+ Fn.push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ Fn.getConstantPool()->getConstants();
+
+ const TargetData &TD = *Fn.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ NumCPEs++;
+ DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n";
+ }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ return false;
+
+ MachineBasicBlock *NextBB = next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ unsigned Offset = 0;
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // Add instruction size to MBBSize.
+ MBBSize += TII->GetInstSizeInBytes(I);
+
+ int Opc = I->getOpcode();
+ if (I->getDesc().isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ case ARM::tBR_JTr:
+ // A Thumb table jump may involve padding; for the offsets to
+ // be right, functions containing these must be 4-byte aligned.
+ AFI->setAlign(2U);
+ if ((Offset+MBBSize)%4 != 0)
+ MBBSize += 2; // padding
+ continue; // Does not get an entry in ImmBranches
+ default:
+ continue; // Ignore other JT branches
+ case ARM::Bcc:
+ isCond = true;
+ UOpc = ARM::B;
+ // Fallthrough
+ case ARM::B:
+ Bits = 24;
+ Scale = 4;
+ break;
+ case ARM::tBcc:
+ isCond = true;
+ UOpc = ARM::tB;
+ Bits = 8;
+ Scale = 2;
+ break;
+ case ARM::tB:
+ Bits = 11;
+ Scale = 2;
+ break;
+ }
+
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+ PushPopMIs.push_back(I);
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ unsigned TSFlags = I->getDesc().TSFlags;
+ switch (TSFlags & ARMII::AddrModeMask) {
+ default:
+ // Constant pool entries can reach anything.
+ if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ continue;
+ if (I->getOpcode() == ARM::tLEApcrel) {
+ Bits = 8; // Taking the address of a CP entry.
+ break;
+ }
+ assert(0 && "Unknown addressing mode for CP reference!");
+ case ARMII::AddrMode1: // AM1: 8 bits << 2
+ Bits = 8;
+ Scale = 4; // Taking the address of a CP entry.
+ break;
+ case ARMII::AddrMode2:
+ Bits = 12; // +-offset_12
+ break;
+ case ARMII::AddrMode3:
+ Bits = 8; // +-offset_8
+ break;
+ // addrmode4 has no immediate offset.
+ case ARMII::AddrMode5:
+ Bits = 8;
+ Scale = 4; // +-(offset_8*4)
+ break;
+ case ARMII::AddrModeT1:
+ Bits = 5; // +offset_5
+ break;
+ case ARMII::AddrModeT2:
+ Bits = 5;
+ Scale = 2; // +(offset_5*2)
+ break;
+ case ARMII::AddrModeT4:
+ Bits = 5;
+ Scale = 4; // +(offset_5*4)
+ break;
+ case ARMII::AddrModeTs:
+ Bits = 8;
+ Scale = 4; // +(offset_8*4)
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+
+ // In thumb mode, if this block is a constpool island, we may need padding
+ // so it's aligned on 4 byte boundary.
+ if (isThumb &&
+ !MBB.empty() &&
+ MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ (Offset%4) != 0)
+ MBBSize += 2;
+
+ BBSizes.push_back(MBBSize);
+ BBOffsets.push_back(Offset);
+ Offset += MBBSize;
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBOffsets[MBB->getNumber()];
+
+ // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+ // alignment padding, and compensate if so.
+ if (isThumb &&
+ MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+ Offset%4 != 0)
+ Offset += 2;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consequtive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update datastructures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+ MachineFunction &MF = *OrigBB->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
+ TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+ NumSplit++;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as UpdateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Likewise for BBOffsets.
+ BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+
+ // Figure out how large the first NewMBB is. (It cannot
+ // contain a constpool_entry or tablejump.)
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+
+ unsigned OrigBBI = OrigBB->getNumber();
+ unsigned NewBBI = NewBB->getNumber();
+ // Set the size of NewBB in BBSizes.
+ BBSizes[NewBBI] = NewBBSize;
+
+ // We removed instructions from UserMBB, subtract that off from its size.
+ // Add 2 or 4 to the block to count the unconditional branch we added to it.
+ unsigned delta = isThumb ? 2 : 4;
+ BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // ...and adjust BBOffsets for NewBB accordingly.
+ BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+ // All BBOffsets following these blocks must be modified.
+ AdjustBBOffsetsAfter(NewBB, delta);
+
+ return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) {
+ // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+ // purposes of the displacement computation; compensate for that here.
+ // Effectively, the valid range of displacements is 2 bytes smaller for such
+ // references.
+ if (isThumb && UserOffset%4 !=0)
+ UserOffset -= 2;
+ // CPEs will be rounded up to a multiple of 4.
+ if (isThumb && TrialOffset%4 != 0)
+ TrialOffset += 2;
+
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset-UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset-TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U)
+{
+ unsigned MaxDisp = U.MaxDisp;
+ MachineFunction::iterator I = next(MachineFunction::iterator(Water));
+ unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+ BBSizes[Water->getNumber()];
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. (Currently applies only to ARM, so
+ // no alignment compensation attempted here.)
+ if (CPEOffset < UserOffset)
+ UserOffset += U.CPEMI->getOperand(2).getImm();
+
+ return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI,
+ unsigned MaxDisp, bool DoDump) {
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+ assert(CPEOffset%4 == 0 && "Misaligned CPE");
+
+ if (DoDump) {
+ DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << " insn address=" << UserOffset
+ << " CPE address=" << CPEOffset
+ << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI;
+ }
+
+ return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+ int delta) {
+ MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) {
+ BBOffsets[i] += delta;
+ // If some existing blocks have padding, adjust the padding as needed, a
+ // bit tricky. delta can be negative so don't use % on that.
+ if (isThumb) {
+ MachineBasicBlock *MBB = MBBI;
+ if (!MBB->empty()) {
+ // Constant pool entries require padding.
+ if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ unsigned oldOffset = BBOffsets[i] - delta;
+ if (oldOffset%4==0 && BBOffsets[i]%4!=0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) {
+ // remove existing padding
+ BBSizes[i] -=2;
+ delta -= 2;
+ }
+ }
+ // Thumb jump tables require padding. They should be at the end;
+ // following unconditional branches are removed by AnalyzeBranch.
+ MachineInstr *ThumbJTMI = NULL;
+ if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+ ThumbJTMI = prior(MBB->end());
+ if (ThumbJTMI) {
+ unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
+ unsigned oldMIOffset = newMIOffset - delta;
+ if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
+ // remove existing padding
+ BBSizes[i] -= 2;
+ delta -= 2;
+ } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) {
+ // add new padding
+ BBSizes[i] += 2;
+ delta += 2;
+ }
+ }
+ if (delta==0)
+ return;
+ }
+ MBBI = next(MBBI);
+ }
+ }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ RemoveDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ NumCPEs--;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) {
+ DOUT << "In range\n";
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) {
+ DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n";
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+}
+
+/// AcceptWater - Small amount of common code factored out of the following.
+
+MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
+ std::vector<MachineBasicBlock*>::iterator IP) {
+ DOUT << "found water in range\n";
+ // Remove the original WaterList entry; we want subsequent
+ // insertions in this vicinity to go after the one we're
+ // about to insert. This considerably reduces the number
+ // of times we have to move the same CPE more than once.
+ WaterList.erase(IP);
+ // CPE goes before following block (NewMBB).
+ return next(MachineFunction::iterator(WaterBB));
+}
+
+/// LookForWater - look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, *NewMBB
+/// is set to the WaterList entry.
+/// For ARM, we prefer the water that's farthest away. For Thumb, prefer
+/// water that will not introduce padding to water that will; within each
+/// group, prefer the water that's farthest away.
+
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+ MachineBasicBlock** NewMBB) {
+ std::vector<MachineBasicBlock*>::iterator IPThatWouldPad;
+ MachineBasicBlock* WaterBBThatWouldPad = NULL;
+ if (!WaterList.empty()) {
+ for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()),
+ B = WaterList.begin();; --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterIsInRange(UserOffset, WaterBB, U)) {
+ if (isThumb &&
+ (BBOffsets[WaterBB->getNumber()] +
+ BBSizes[WaterBB->getNumber()])%4 != 0) {
+ // This is valid Water, but would introduce padding. Remember
+ // it in case we don't find any Water that doesn't do this.
+ if (!WaterBBThatWouldPad) {
+ WaterBBThatWouldPad = WaterBB;
+ IPThatWouldPad = IP;
+ }
+ } else {
+ *NewMBB = AcceptWater(WaterBB, IP);
+ return true;
+ }
+ }
+ if (IP == B)
+ break;
+ }
+ }
+ if (isThumb && WaterBBThatWouldPad) {
+ *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
+ return true;
+ }
+ return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case *NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+ unsigned UserOffset, MachineBasicBlock** NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+ BBSizes[UserMBB->getNumber()];
+ assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+ // If the use is at the end of the block, or the end of the block
+ // is within range, make new water there. (The addition below is
+ // for the unconditional branch we will be adding: 4 bytes on ARM,
+ // 2 on Thumb. Possible Thumb alignment padding is allowed for
+ // inside OffsetIsInRange.
+ // If the block ends in an unconditional branch already, it is water,
+ // and is known to be out of range, so we'll always be adding a branch.)
+ if (&UserMBB->back() == UserMI ||
+ OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4),
+ U.MaxDisp, !isThumb)) {
+ DOUT << "Split at end of block\n";
+ if (&UserMBB->back() == UserMI)
+ assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+ *NewMBB = next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block.
+ // Record it for branch lengthening; this new branch will not get out of
+ // range, but if the preceding conditional branch is out of range, the
+ // targets will be exchanged, and the altered branch may be out of
+ // range, so the machinery has to know about it.
+ int UncondBr = isThumb ? ARM::tB : ARM::B;
+ BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
+ TII->get(UncondBr)).addMBB(*NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ int delta = isThumb ? 2 : 4;
+ BBSizes[UserMBB->getNumber()] += delta;
+ AdjustBBOffsetsAfter(UserMBB, delta);
+ } else {
+ // What a big block. Find a place within the block to split it.
+ // This is a little tricky on Thumb since instructions are 2 bytes
+ // and constant pool entries are 4 bytes: if instruction I references
+ // island CPE, and instruction I+1 references CPE', it will
+ // not work well to put CPE as far forward as possible, since then
+ // CPE' cannot immediately follow it (that location is 2 bytes
+ // farther away from I+1 than CPE was from I) and we'd need to create
+ // a new island. So, we make a first guess, then walk through the
+ // instructions between the one currently being looked at and the
+ // possible insertion point, and make sure any other instructions
+ // that reference CPEs will be able to use the same island area;
+ // if not, we back up the insertion point.
+
+ // The 4 in the following is for the unconditional branch we'll be
+ // inserting (allows for long branch on Thumb). Alignment of the
+ // island is handled inside OffsetIsInRange.
+ unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+ // This could point off the end of the block if we've already got
+ // constant pool entries following this block; only the last one is
+ // in the water list. Back past any possible branches (allow for a
+ // conditional and a maximally long unconditional).
+ if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+ BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+ (isThumb ? 6 : 8);
+ unsigned EndInsertOffset = BaseInsertOffset +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = next(MI)) {
+ if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ if (!OffsetIsInRange(Offset, EndInsertOffset,
+ CPUsers[CPUIndex].MaxDisp, !isThumb)) {
+ BaseInsertOffset -= (isThumb ? 2 : 4);
+ EndInsertOffset -= (isThumb ? 2 : 4);
+ }
+ // This is overly conservative, as we don't account for CPEMIs
+ // being reused within the block, but it doesn't matter much.
+ EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+ DOUT << "Split in middle of big block\n";
+ *NewMBB = SplitBlockBeforeInstr(prior(MI));
+ }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
+ unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ MachineBasicBlock *NewMBB;
+ // Compute this only once, it's expensive. The 4 or 8 is the value the
+ // hardware keeps in the PC (2 insns ahead of the reference).
+ unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+ // Special case: tLEApcrel are two instructions MI's. The actual user is the
+ // second instruction.
+ if (UserMI->getOpcode() == ARM::tLEApcrel)
+ UserOffset += 2;
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = LookForExistingCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = AFI->createConstPoolEntryUId();
+
+ // Look for water where we can place this CPE. We look for the farthest one
+ // away that will work. Forward references only for now (although later
+ // we might find some that are backwards).
+
+ if (!LookForWater(U, UserOffset, &NewMBB)) {
+ // No water found.
+ DOUT << "No water found\n";
+ CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
+ }
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock();
+ Fn.insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ DecrementOldEntry(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
+ TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ NumCPEs++;
+
+ BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+ // Compensate for .align 2 in thumb mode.
+ if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+ Size += 2;
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+ AdjustBBOffsetsAfter(NewIsland, Size);
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI;
+
+ return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBSizes[CPEBB->getNumber()] -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ // In thumb mode, the size of island may be padded by two to compensate for
+ // the alignment requirement. Then it will now be 2 when the block is
+ // empty, so fix this.
+ // All succeeding offsets have the current size value added in, fix this.
+ if (BBSizes[CPEBB->getNumber()] != 0) {
+ Size += BBSizes[CPEBB->getNumber()];
+ BBSizes[CPEBB->getNumber()] = 0;
+ }
+ }
+ AdjustBBOffsetsAfter(CPEBB, -Size);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ RemoveDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+ unsigned MaxDisp) {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = GetOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+ DOUT << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << GetOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI;
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return FixUpUnconditionalBr(Fn, Br);
+ return FixUpConditionalBr(Fn, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(isThumb && "Expected a Thumb function!");
+
+ // Use BL to implement far jump.
+ Br.MaxDisp = (1 << 21) * 2;
+ MI->setDesc(TII->get(ARM::tBfar));
+ BBSizes[MBB->getNumber()] += 2;
+ AdjustBBOffsetsAfter(MBB, 2);
+ HasFarJump = true;
+ NumUBrFixed++;
+
+ DOUT << " Changed B to long jump " << *MI;
+
+ return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+ CC = ARMCC::getOppositeCondition(CC);
+ unsigned CCReg = MI->getOperand(2).getReg();
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ NumCBrFixed++;
+ if (BMI != MI) {
+ if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+ DOUT << " Invert Bcc condition and swap its destination with " << *BMI;
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ MI->getOperand(1).setImm(CC);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ SplitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBSizes[MBB->getNumber()] -= delta;
+ MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ AdjustBBOffsetsAfter(SplitBB, -delta);
+ MBB->back().eraseFromParent();
+ // BBOffsets[SplitBB] is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+
+ DOUT << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n";
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc::getUnknownLoc(),
+ TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ // The net size change is an addition of one unconditional branch.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ AdjustBBOffsetsAfter(MBB, delta);
+ return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+ bool MadeChange = false;
+ for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+ MachineInstr *MI = PushPopMIs[i];
+ if (MI->getOpcode() == ARM::tPOP_RET &&
+ MI->getOperand(0).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 1) {
+ BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ }
+ return MadeChange;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 000000000000..3a038c9a8c37
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,100 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)gv->getType()),
+ GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind k,
+ unsigned char PCAdj,
+ const char *Modif,
+ bool AddCA)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj),
+ Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,
+ ARMCP::ARMCPKind k,
+ const char *Modif)
+ : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+ GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0),
+ Modifier(Modif) {}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->GV == GV &&
+ CPV->S == S &&
+ CPV->LabelId == LabelId &&
+ CPV->Kind == Kind &&
+ CPV->PCAdjust == PCAdjust)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddPointer(S);
+ ID.AddInteger(LabelId);
+ ID.AddInteger((unsigned)Kind);
+ ID.AddInteger(PCAdjust);
+}
+
+void ARMConstantPoolValue::dump() const {
+ cerr << " " << *this;
+}
+
+void ARMConstantPoolValue::print(std::ostream &O) const {
+ raw_os_ostream RawOS(O);
+ print(RawOS);
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+ if (GV)
+ O << GV->getName();
+ else
+ O << S;
+ if (isNonLazyPointer()) O << "$non_lazy_ptr";
+ else if (isStub()) O << "$stub";
+ if (Modifier) O << "(" << Modifier << ")";
+ if (PCAdjust != 0) {
+ O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+ if (AddCurrentAddress) O << "-.";
+ O << ")";
+ }
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 000000000000..d2b9066dcc97
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,92 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include <iosfwd>
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace ARMCP {
+ enum ARMCPKind {
+ CPValue,
+ CPNonLazyPtr,
+ CPStub
+ };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC relative displacement between the address of the load
+/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ GlobalValue *GV; // GlobalValue being loaded.
+ const char *S; // ExtSymbol being loaded.
+ unsigned LabelId; // Label id of the load.
+ ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub?
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
+ // 8 for ARM, 4 for Thumb.
+ const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+ bool AddCurrentAddress;
+
+public:
+ ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(const char *s, unsigned id,
+ ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+ unsigned char PCAdj = 0, const char *Modifier = NULL,
+ bool AddCurrentAddress = false);
+ ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
+ const char *Modifier);
+
+
+ GlobalValue *getGV() const { return GV; }
+ const char *getSymbol() const { return S; }
+ const char *getModifier() const { return Modifier; }
+ bool hasModifier() const { return Modifier != NULL; }
+ bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+ unsigned getLabelId() const { return LabelId; }
+ bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
+ bool isStub() const { return Kind == ARMCP::CPStub; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ void print(std::ostream *O) const { if (O) print(*O); }
+ void print(std::ostream &O) const;
+ void print(raw_ostream *O) const { if (O) print(*O); }
+ void print(raw_ostream &O) const;
+ void dump() const;
+};
+
+ inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+ V.print(O);
+ return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
new file mode 100644
index 000000000000..405b8f2b9f28
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -0,0 +1,32 @@
+//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+
+class ARMFrameInfo : public TargetFrameInfo {
+public:
+ explicit ARMFrameInfo(const ARMSubtarget &ST)
+ : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+ }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 000000000000..ca3a9cb40329
--- /dev/null
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,911 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+public:
+ explicit ARMDAGToDAGISel(ARMTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ SDNode *Select(SDValue Op);
+ virtual void InstructionSelect();
+ bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode3Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+
+ bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
+ SDValue &Label);
+
+ bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+ bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset);
+ bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset);
+ bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &OffImm);
+
+ bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+ SDValue &B, SDValue &C);
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+}
+
+void ARMDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() == ISD::ADD)
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC >= 0 && RHSC < 0x1000) ||
+ (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(1).getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getZExtValue();
+ if (Val >= 0 && Val < 0x1000) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+ return true;
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getZExtValue();
+ if (Val >= 0 && Val < 256) {
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
+ }
+
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset) {
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4.
+ RHSC >>= 2;
+ if ((RHSC >= 0 && RHSC < 256) ||
+ (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+ MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
+ SDValue &Offset, SDValue &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ MVT::i32);
+ return true;
+ }
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset){
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = Op.getDebugLoc();
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ // We must materialize a zero in a reg! Returning a constant here
+ // wouldn't work without additional code to position the node within
+ // ISel's topological ordering in a place where ISel will process it
+ // normally. Instead, just explicitly issue a tMOVri8 node!
+ Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ return true;
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
+ unsigned Scale, SDValue &Base,
+ SDValue &OffImm, SDValue &Offset) {
+ if (Scale == 4) {
+ SDValue TmpBase, TmpOffImm;
+ if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+ return false; // We want to select tLDRspi / tSTRspi instead.
+ if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return false; // We want to select tLDRpci instead.
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // Thumb does not have [sp, r] address mode.
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+ if ((LHSR && LHSR->getReg() == ARM::SP) ||
+ (RHSR && RHSR->getReg() == ARM::SP)) {
+ Base = N;
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied.
+ RHSC /= Scale;
+ if (RHSC >= 0 && RHSC < 32) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm,
+ SDValue &Offset) {
+ return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+ (LHSR && LHSR->getReg() == ARM::SP)) {
+ // If the RHS is + imm8 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied.
+ RHSC >>= 2;
+ if (RHSC >= 0 && RHSC < 256) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+ SDValue N,
+ SDValue &BaseReg,
+ SDValue &ShReg,
+ SDValue &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getZExtValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
+ }
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
+ return true;
+}
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+ return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+
+SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ bool UseCP = true;
+ if (Subtarget->isThumb())
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ if (UseCP) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+
+ SDNode *ResNode;
+ if (Subtarget->isThumb())
+ ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+ CPIdx, CurDAG->getEntryNode());
+ else {
+ SDValue Ops[] = {
+ CPIdx,
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+ Ops, 6);
+ }
+ ReplaceUses(Op, SDValue(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ if (Subtarget->isThumb()) {
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ } else {
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5);
+ }
+ }
+ case ISD::ADD: {
+ if (!Subtarget->isThumb())
+ break;
+ // Select add sp, c to tADDhirr.
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
+ RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
+ if (LHSR && LHSR->getReg() == ARM::SP) {
+ std::swap(N0, N1);
+ std::swap(LHSR, RHSR);
+ }
+ if (RHSR && RHSR->getReg() == ARM::SP) {
+ SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl,
+ Op.getValueType(), N0, N0), 0);
+ return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
+ }
+ break;
+ }
+ case ISD::MUL:
+ if (Subtarget->isThumb())
+ break;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RHSV = C->getZExtValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ SDValue V = Op.getOperand(0);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+ SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ SDValue V = Op.getOperand(0);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+ SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+ }
+ }
+ break;
+ case ARMISD::FMRRD:
+ return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+ Op.getOperand(0), getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32));
+ case ISD::UMUL_LOHI: {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ISD::SMUL_LOHI: {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ MVT LoadedVT = LD->getMemoryVT();
+ if (AM != ISD::UNINDEXED) {
+ SDValue Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
+
+ if (Match) {
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+ CurDAG->getRegister(0, MVT::i32), Chain };
+ return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Ops, 6);
+ }
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ARMISD::BRCOND: {
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+ // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+
+ unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+ SDValue Chain = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N1.getOpcode() == ISD::BasicBlock);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other,
+ MVT::Flag, Ops, 5);
+ Chain = SDValue(ResNode, 0);
+ if (Op.getNode()->getNumValues() == 2) {
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
+ }
+ ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo()));
+ return NULL;
+ }
+ case ARMISD::CMOV: {
+ bool isThumb = Subtarget->isThumb();
+ MVT VT = Op.getValueType();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 18 cost = 1 size = 0
+ SDValue CPTmp0;
+ SDValue CPTmp1;
+ SDValue CPTmp2;
+ if (!isThumb && VT == MVT::i32 &&
+ SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false,
+ // (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true,
+ // (imm:i32):$cc)
+ // Emits: (MOVCCi:i32 GPR:i32:$false,
+ // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc)
+ // Pattern complexity = 10 cost = 1 size = 0
+ if (VT == MVT::i32 &&
+ N3.getOpcode() == ISD::Constant &&
+ Predicate_so_imm(N3.getNode())) {
+ SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N1)->getZExtValue()),
+ MVT::i32);
+ Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode());
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+ return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5);
+ }
+
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 1 size = 0
+ //
+ // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+ // Pattern complexity = 6 cost = 11 size = 0
+ //
+ // Also FCPYScc and FCPYDcc.
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr;
+ break;
+ case MVT::f32:
+ Opc = ARM::FCPYScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FCPYDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+ }
+ case ARMISD::CNEG: {
+ MVT VT = Op.getValueType();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ SDValue N3 = Op.getOperand(3);
+ SDValue InFlag = Op.getOperand(4);
+ assert(N2.getOpcode() == ISD::Constant);
+ assert(N3.getOpcode() == ISD::Register);
+
+ SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+ cast<ConstantSDNode>(N2)->getZExtValue()),
+ MVT::i32);
+ SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Illegal conditional move type!");
+ break;
+ case MVT::f32:
+ Opc = ARM::FNEGScc;
+ break;
+ case MVT::f64:
+ Opc = ARM::FNEGDcc;
+ break;
+ }
+ return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+ }
+
+ case ISD::DECLARE: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+ // FIXME: handle VLAs.
+ if (!FINode) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ if (N2.getOpcode() == ARMISD::PIC_ADD && isa<LoadSDNode>(N2.getOperand(0)))
+ N2 = N2.getOperand(0);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(N2);
+ if (!Ld) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ SDValue BasePtr = Ld->getBasePtr();
+ assert(BasePtr.getOpcode() == ARMISD::Wrapper &&
+ isa<ConstantPoolSDNode>(BasePtr.getOperand(0)) &&
+ "llvm.dbg.variable should be a constantpool node");
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(BasePtr.getOperand(0));
+ GlobalValue *GV = 0;
+ if (CP->isMachineConstantPoolEntry()) {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal();
+ GV = ACPV->getGV();
+ } else
+ GV = dyn_cast<GlobalValue>(CP->getConstVal());
+ if (!GV) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
+ TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
+ SDValue Ops[] = { Tmp1, Tmp2, Chain };
+ return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
+ MVT::Other, Ops, 3);
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+
+ SDValue Base, Offset, Opc;
+ if (!SelectAddrMode2(Op, Op, Base, Offset, Opc))
+ return true;
+
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ OutOps.push_back(Opc);
+ return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+ return new ARMDAGToDAGISel(TM);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 000000000000..c0fd9dcd1072
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,2346 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM), ARMPCLabelIndex(0) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ if (Subtarget->isTargetDarwin()) {
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+ setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+ setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
+
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+ }
+
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+
+ if (Subtarget->isThumb())
+ addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+ else
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ }
+ computeRegisterProperties();
+
+ // ARM does not have f32 extending load.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM does not have i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+
+ // i64 operation support.
+ if (Subtarget->isThumb()) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ } else {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // Only ARMv6 has BSWAP.
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb())
+ // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // We don't support sin/cos/fmod/copysign/pow
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ }
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ // ARMISD::FMRRD - No need to call setTargetDAGCombine
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
+ setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
+
+ maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
+ // Do not enable CodePlacementOpt for now: it currently runs after the
+ // ARMConstantIslandPass and messes up branch relaxation and placement
+ // of constant islands.
+ // benefitFromCodePlacementOpt = true;
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPNZ: return "ARMISD::CMPNZ";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CNEG: return "ARMISD::CNEG";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::FMRRD: return "ARMISD::FMRRD";
+ case ARMISD::FMDRR: return "ARMISD::FMDRR";
+
+ case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+/// returns true if the operands should be inverted to form the proper
+/// comparison.
+static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ bool Invert = false;
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: assert(0 && "Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+ return Invert;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const unsigned LoRegList[] = { ARM::R1,
+ ARM::R2,
+ ARM::R3,
+ ARM::NoRegister };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 4; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ if (LoRegList[i] != ARM::NoRegister)
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+ static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ MVT::i32, LocInfo));
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *ARMTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool isVarArg = TheCall->isVarArg();
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val;
+ if (VA.needsCustom()) {
+ // Handle f64 as custom.
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
+
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ ResultVals.push_back(Val);
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/false, NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA, SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags) {
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+ }
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ MVT RetVT = TheCall->getRetValType(0);
+ SDValue Chain = TheCall->getChain();
+ unsigned CC = TheCall->getCallingConv();
+ assert((CC == CallingConv::C ||
+ CC == CallingConv::Fast) && "unknown calling convention");
+ bool isVarArg = TheCall->isVarArg();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization, arguments are handled later.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = TheCall->getArg(realArgIdx);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // f64 is passed in i32 pairs and must be combined
+ if (VA.needsCustom()) {
+ SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isRegLoc())
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1)));
+ else {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, fmrrd.getValue(1),
+ Flags));
+ }
+ } else if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, Arg, Flags));
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ bool isLocalARMFunc = false;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ isDirect = true;
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // ARM call to a local ARM function is predicable.
+ isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ // tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ isDirect = true;
+ bool isStub = Subtarget->isTargetDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // tBX takes a register source operand.
+ const char *Sym = S->getSymbol();
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
+ ARMCP::CPStub, 4);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ Callee = DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+ getPointerTy(), Callee, PICLabel);
+ } else
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ }
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+ : ARMISD::CALL_NOLINK;
+ }
+ if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) {
+ // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
+ Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (RetVT != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analyze return values of ISD::RET.
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ SDValue Arg = Op.getOperand(realRVLocIdx*2+1);
+
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
+ // available.
+ if (VA.needsCustom()) {
+ SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+ // Guarantee that all emitted copies are
+ // stuck together, avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ SDValue result;
+ if (Flag.getNode())
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else // Return Void
+ result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+ return result;
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ DebugLoc dl = GA->getDebugLoc();
+ MVT PtrVT = getPointerTy();
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "tlsgd", true);
+ SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+ Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
+ SDValue Chain = Argument.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+ // call __tls_get_addr.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Argument;
+ Entry.Ty = (const Type *) Type::Int32Ty;
+ Args.push_back(Entry);
+ // FIXME: is there useful debug info available here?
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) {
+ GlobalValue *GV = GA->getGlobal();
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue Offset;
+ SDValue Chain = DAG.getEntryNode();
+ MVT PtrVT = getPointerTy();
+ // Get the Thread Pointer
+ SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+ if (GV->isDeclaration()){
+ // initial exec model
+ unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+ PCAdj, "gottpoff", true);
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ Chain = Offset.getValue(1);
+
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ } else {
+ // local exec model
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+ Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+ Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+ }
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+ // otherwise use the "Local Exec" TLS Model
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ return LowerToTLSGeneralDynamicModel(GA, DAG);
+ else
+ return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ if (RelocM == Reloc::PIC_) {
+ bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+ ARMConstantPoolValue *CPV =
+ new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ CPAddr, NULL, 0);
+ SDValue Chain = Result.getValue(1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+ if (!UseGOTOFF)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+ return Result;
+ } else {
+ SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ }
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+/// even in non-static mode.
+static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
+ return false;
+ return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
+ SDValue CPAddr;
+ if (RelocM == Reloc::Static)
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+ else {
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
+ : ARMCP::CPValue;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ Kind, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ }
+ if (IsIndirect)
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+
+ return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+ SelectionDAG &DAG){
+ assert(Subtarget->isTargetELF() &&
+ "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
+ ARMPCLabelIndex,
+ ARMCP::CPValue, PCAdj);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_thread_pointer:
+ return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+ case Intrinsic::eh_sjlj_setjmp:
+ SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32,
+ Op.getOperand(1));
+ return Res;
+ }
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ unsigned VarArgsFrameIndex) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+}
+
+SDValue
+ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ SDValue Root = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC;
+ if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ if (RegVT == MVT::f64) {
+ // f64 is passed in pairs of GPRs and must be combined.
+ RegVT = MVT::i32;
+ } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
+ assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // f64 is passed in i32 pairs and must be combined.
+ if (VA.needsCustom()) {
+ SDValue ArgValue2;
+
+ VA = ArgLocs[++i]; // skip ahead to next loc
+ if (VA.isMemLoc()) {
+ // must be APCS to split like this
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+ // Create load node to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
+ } else {
+ Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+ }
+
+ ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64,
+ ArgValue, ArgValue2);
+ }
+
+ // If this is an 8 or 16-bit value, it is really passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::SExt:
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::ZExt:
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ break;
+ }
+
+ ArgValues.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+ assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ // varargs
+ if (isVarArg) {
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ unsigned NumGPRs = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned VARegSize = (4 - NumGPRs) * 4;
+ unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ unsigned ArgOffset = 0;
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ ArgOffset = CCInfo.getNextStackOffset();
+ VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
+ VARegSaveSize - VARegSize);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; NumGPRs < 4; ++NumGPRs) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumbFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isPosZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isPosZero();
+ }
+ }
+ return false;
+}
+
+static bool isLegalCmpImmediate(unsigned C, bool isThumb) {
+ return ( isThumb && (C & ~255U) == 0) ||
+ (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, bool isThumb,
+ DebugLoc dl) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ unsigned C = RHSC->getZExtValue();
+ if (!isLegalCmpImmediate(C, isThumb)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if (isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if (isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMISD::NodeType CompareType;
+ switch (CondCode) {
+ default:
+ CompareType = ARMISD::CMP;
+ break;
+ case ARMCC::EQ:
+ case ARMCC::NE:
+ case ARMCC::MI:
+ case ARMCC::PL:
+ // Uses only N and Z Flags
+ CompareType = ARMISD::CMPNZ;
+ break;
+ }
+ ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ MVT VT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMCC;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ std::swap(TrueVal, FalseVal);
+
+ SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ ARMCC, CCR, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+ Result, TrueVal, ARMCC2, CCR, Cmp2);
+ }
+ return Result;
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDValue ARMCC;
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMCC, CCR,Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ // Swap the LHS/RHS of the comparison if needed.
+ std::swap(LHS, RHS);
+
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ if (CondCode2 != ARMCC::AL) {
+ ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ }
+ return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
+ Chain, Addr, NULL, 0);
+ Chain = Addr.getValue(1);
+ if (isPIC)
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc =
+ Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+ Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDValue Tmp0 = Op.getOperand(0);
+ SDValue Tmp1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT SrcVT = Tmp1.getValueType();
+ SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+ SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
+ SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+ ? ARM::R7 : ARM::R11;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
+SDValue
+ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff){
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ MVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ DebugLoc dl = N->getDebugLoc();
+ if (N->getValueType(0) == MVT::f64) {
+ // Turn i64->f64 into FMDRR.
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ }
+
+ // Turn f64->i64 into FMRRD.
+ SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+}
+
+static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) {
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ return SDValue();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb()) return SDValue();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ DebugLoc dl = N->getDebugLoc();
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Don't know how to custom lower this!"); abort();
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress:
+ return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+ LowerGlobalAddressELF(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
+ case ISD::SRL:
+ case ISD::SRA: return ExpandSRx(Op.getNode(), DAG,Subtarget);
+ }
+ return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Don't know how to custom expand this!");
+ return;
+ case ISD::BIT_CONVERT:
+ Results.push_back(ExpandBIT_CONVERT(N, DAG));
+ return;
+ case ISD::SRL:
+ case ISD::SRA: {
+ SDValue Res = ExpandSRx(N, DAG, Subtarget);
+ if (Res.getNode())
+ Results.push_back(Res);
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case ARM::tMOVCCr: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT VT = N->getValueType(0);
+ unsigned Opc = N->getOpcode();
+ bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+ SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+ SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+
+ if (isSlctCC) {
+ CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+ } else {
+ SDValue CCOp = Slct.getOperand(0);
+ if (CCOp.getOpcode() == ISD::SETCC)
+ CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+ }
+
+ bool DoXform = false;
+ bool InvCC = false;
+ assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+ "Bad input!");
+
+ if (LHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(LHS)->isNullValue()) {
+ DoXform = true;
+ } else if (CC != ISD::SETCC_INVALID &&
+ RHS.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(RHS)->isNullValue()) {
+ std::swap(LHS, RHS);
+ SDValue Op0 = Slct.getOperand(0);
+ MVT OpVT = isSlctCC ? Op0.getValueType() :
+ Op0.getOperand(0).getValueType();
+ bool isInt = OpVT.isInteger();
+ CC = ISD::getSetCCInverse(CC, isInt);
+
+ if (!TLI.isCondCodeLegal(CC, OpVT))
+ return SDValue(); // Inverse operator isn't legal.
+
+ DoXform = true;
+ InvCC = true;
+ }
+
+ if (DoXform) {
+ SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+ if (isSlctCC)
+ return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+ Slct.getOperand(0), Slct.getOperand(1), CC);
+ SDValue CCOp = Slct.getOperand(0);
+ if (InvCC)
+ CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+ CCOp.getOperand(0), CCOp.getOperand(1), CC);
+ return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+ CCOp, OtherOp, Result);
+ }
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+ if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+ if (Result.getNode()) return Result;
+ }
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+static SDValue PerformSUBCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // added by evan in r37685 with no testcase.
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+ if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+ SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+ if (Result.getNode()) return Result;
+ }
+
+ return SDValue();
+}
+
+
+/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
+static SDValue PerformFMRRDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // fmrrd(fmdrr x, y) -> x,y
+ SDValue InDouble = N->getOperand(0);
+ if (InDouble.getOpcode() == ARMISD::FMDRR)
+ return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+ return SDValue();
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+ }
+
+ return SDValue();
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, MVT VT,
+ const ARMSubtarget *Subtarget) {
+ if (V == 0)
+ return true;
+
+ if (!VT.isSimple())
+ return false;
+
+ if (Subtarget->isThumb()) {
+ if (V < 0)
+ return false;
+
+ unsigned Scale = 1;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ // Scale == 1;
+ break;
+ case MVT::i16:
+ // Scale == 2;
+ Scale = 2;
+ break;
+ case MVT::i32:
+ // Scale == 4;
+ Scale = 4;
+ break;
+ }
+
+ if ((V & (Scale - 1)) != 0)
+ return false;
+ V /= Scale;
+ return V == (V & ((1LL << 5) - 1));
+ }
+
+ if (V < 0)
+ V = - V;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ // +- imm12
+ return V == (V & ((1LL << 12) - 1));
+ case MVT::i16:
+ // +- imm8
+ return V == (V & ((1LL << 8) - 1));
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ if ((V & 3) != 0)
+ return false;
+ V >>= 2;
+ return V == (V & ((1LL << 8) - 1));
+ }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ MVT VT = getValueType(Ty, true);
+ if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+ return false;
+
+ // Can never fold addr of global into load/store.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // no scale reg, must be "r+i" or "r", or "i".
+ break;
+ case 1:
+ if (Subtarget->isThumb())
+ return false;
+ // FALL THROUGH.
+ default:
+ // ARM doesn't support any R+R*scale+imm addr modes.
+ if (AM.BaseOffs)
+ return false;
+
+ if (!VT.isSimple())
+ return false;
+
+ int Scale = AM.Scale;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i32:
+ case MVT::i64:
+ // This assumes i64 is legalized to a pair of i32. If not (i.e.
+ // ldrd / strd are used, then its address mode is same as i16.
+ // r + r
+ if (Scale < 0) Scale = -Scale;
+ if (Scale == 1)
+ return true;
+ // r + r << imm
+ return isPowerOf2_32(Scale & ~1);
+ case MVT::i16:
+ // r + r
+ if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ return true;
+ return false;
+
+ case MVT::isVoid:
+ // Note, we allow "void" uses (basically, uses that aren't loads or
+ // stores), because arm allows folding a scale into many arithmetic
+ // operations. This should be made more precise and revisited later.
+
+ // Allow r << imm, but the imm has to be a multiple of two.
+ if (AM.Scale & 1) return false;
+ return isPowerOf2_32(AM.Scale);
+ }
+ break;
+ }
+ return true;
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
+ bool isSEXTLoad, SDValue &Base,
+ SDValue &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -256) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+ }
+ return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT VT;
+ SDValue Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+ return false;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ APInt KnownZeroRHS, KnownOneRHS;
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'l': return C_RegisterClass;
+ case 'w': return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'l':
+ if (Subtarget->isThumb())
+ return std::make_pair(0U, ARM::tGPRRegisterClass);
+ else
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'r':
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, ARM::SPRRegisterClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, ARM::DPRRegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) { // GCC ARM Constraint Letters
+ default: break;
+ case 'l':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ 0);
+ case 'r':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::LR, 0);
+ case 'w':
+ if (VT == MVT::f32)
+ return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+ ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+ ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+ ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+ ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+ if (VT == MVT::f64)
+ return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+ ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+ ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ break;
+ }
+
+ return std::vector<unsigned>();
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char Constraint,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ switch (Constraint) {
+ default: break;
+ case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ int64_t CVal64 = C->getSExtValue();
+ int CVal = (int) CVal64;
+ // None of these constraints allow values larger than 32 bits. Check
+ // that the value fits in an int.
+ if (CVal != CVal64)
+ return;
+
+ switch (Constraint) {
+ case 'I':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between 0 and 255, for ADD immediates.
+ if (CVal >= 0 && CVal <= 255)
+ break;
+ } else {
+ // A constant that can be used as an immediate value in a
+ // data-processing instruction.
+ if (ARM_AM::getSOImmVal(CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'J':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between -255 and -1, for negated ADD
+ // immediates. This can be used in GCC with an "n" modifier that
+ // prints the negated value, for use with SUB instructions. It is
+ // not useful otherwise but is implemented for compatibility.
+ if (CVal >= -255 && CVal <= -1)
+ break;
+ } else {
+ // This must be a constant between -4095 and 4095. It is not clear
+ // what this constraint is intended for. Implemented for
+ // compatibility with GCC.
+ if (CVal >= -4095 && CVal <= 4095)
+ break;
+ }
+ return;
+
+ case 'K':
+ if (Subtarget->isThumb()) {
+ // A 32-bit value where only one byte has a nonzero value. Exclude
+ // zero to match GCC. This constraint is used by GCC internally for
+ // constants that can be loaded with a move/shift combination.
+ // It is not useful otherwise but is implemented for compatibility.
+ if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+ break;
+ } else {
+ // A constant whose bitwise inverse can be used as an immediate
+ // value in a data-processing instruction. This can be used in GCC
+ // with a "B" modifier that prints the inverted value, for use with
+ // BIC and MVN instructions. It is not useful otherwise but is
+ // implemented for compatibility.
+ if (ARM_AM::getSOImmVal(~CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'L':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between -7 and 7,
+ // for 3-operand ADD/SUB immediate instructions.
+ if (CVal >= -7 && CVal < 7)
+ break;
+ } else {
+ // A constant whose negation can be used as an immediate value in a
+ // data-processing instruction. This can be used in GCC with an "n"
+ // modifier that prints the negated value, for use with SUB
+ // instructions. It is not useful otherwise but is implemented for
+ // compatibility.
+ if (ARM_AM::getSOImmVal(-CVal) != -1)
+ break;
+ }
+ return;
+
+ case 'M':
+ if (Subtarget->isThumb()) {
+ // This must be a multiple of 4 between 0 and 1020, for
+ // ADD sp + immediate.
+ if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+ break;
+ } else {
+ // A power of two or a constant between 0 and 32. This is used in
+ // GCC for the shift amount on shifted register operands, but it is
+ // useful in general for any shift amounts.
+ if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+ break;
+ }
+ return;
+
+ case 'N':
+ if (Subtarget->isThumb()) {
+ // This must be a constant between 0 and 31, for shift amounts.
+ if (CVal >= 0 && CVal <= 31)
+ break;
+ }
+ return;
+
+ case 'O':
+ if (Subtarget->isThumb()) {
+ // This must be a multiple of 4 between -508 and 508, for
+ // ADD/SUB sp = sp + immediate.
+ if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+ break;
+ }
+ return;
+ }
+ Result = DAG.getTargetConstant(CVal, Op.getValueType());
+ break;
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+ Ops, DAG);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 000000000000..2dab2dbba663
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,184 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_PRED, // Function call that's predicable.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPNZ, // ARM compare that uses only N or Z flags.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+ CNEG, // ARM conditional negate instructions.
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ FMRRD, // double to two gprs.
+ FMDRR, // Two gprs to double.
+
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp
+
+ THREAD_POINTER
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ public:
+ explicit ARMTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const;
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ virtual const ARMSubtarget* getSubtarget() {
+ return Subtarget;
+ }
+
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr, const CCValAssign &VA,
+ SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG);
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff);
+ };
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 000000000000..9a1e1c2bb756
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,868 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+ bits<5> Value = val;
+}
+
+def Pseudo : Format<0>;
+def MulFrm : Format<1>;
+def BrFrm : Format<2>;
+def BrMiscFrm : Format<3>;
+
+def DPFrm : Format<4>;
+def DPSoRegFrm : Format<5>;
+
+def LdFrm : Format<6>;
+def StFrm : Format<7>;
+def LdMiscFrm : Format<8>;
+def StMiscFrm : Format<9>;
+def LdStMulFrm : Format<10>;
+
+def ArithMiscFrm : Format<11>;
+def ExtFrm : Format<12>;
+
+def VFPUnaryFrm : Format<13>;
+def VFPBinaryFrm : Format<14>;
+def VFPConv1Frm : Format<15>;
+def VFPConv2Frm : Format<16>;
+def VFPConv3Frm : Format<17>;
+def VFPConv4Frm : Format<18>;
+def VFPConv5Frm : Format<19>;
+def VFPLdStFrm : Format<20>;
+def VFPLdStMulFrm : Format<21>;
+def VFPMiscFrm : Format<22>;
+
+def ThumbFrm : Format<23>;
+
+// Misc flag for data processing instructions that indicates whether
+// the instruction has a Rn register operand.
+class UnaryDP { bit isUnaryDataProc = 1; }
+
+//===----------------------------------------------------------------------===//
+
+// ARM Instruction templates.
+//
+
+class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+ Format f, string cstr>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "ARM";
+
+ // TSFlagsFields
+ AddrMode AM = am;
+ bits<4> AddrModeBits = AM.Value;
+
+ SizeFlagVal SZ = sz;
+ bits<3> SizeFlag = SZ.Value;
+
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+
+ Format F = f;
+ bits<5> Form = F.Value;
+
+ //
+ // Attributes specific to ARM instructions...
+ //
+ bit isUnaryDataProc = 0;
+
+ let Constraints = cstr;
+}
+
+class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern>
+ : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, ""> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+}
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as
+// an input operand since by default it's a zero register. It will
+// become an implicit def once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+ let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, im, f, cstr> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, opc,
+ asm, "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, asm,
+ "", pattern> {
+ let Inst{27-24} = opcod;
+}
+class ABXIx2<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, asm,
+ "", pattern>;
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm,
+ asm, "", pattern>;
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{24-21} = opcod;
+ let Inst{27-26} = {0,0};
+}
+class AI1x2<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+
+
+// addrmode2 loads and stores
+class AI2<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{27-26} = {0,1};
+}
+
+// loads
+class AI2ldw<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2ldw<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2ldb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// stores
+class AI2stw<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2stw<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AXI2stb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed loads
+class AI2ldwpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed stores
+class AI2stwpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Post-indexed loads
+class AI2ldwpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2ldbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// Post-indexed stores
+class AI2stwpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+class AI2stbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 1; // B bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-26} = {0,1};
+}
+
+// addrmode3 instructions
+class AI3<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern>;
+class AXI3<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern>;
+
+// loads
+class AI3ldh<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldh<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsh<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldsh<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsb<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3ldsb<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldd<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// stores
+class AI3sth<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AXI3sth<dag oops, dag iops, Format f, string asm,
+ list<dag> pattern>
+ : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3std<dag oops, dag iops, Format f, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+ asm, "", pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Pre-indexed loads
+class AI3ldhpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldshpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+class AI3ldsbpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+ asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+}
+
+// Post-indexed loads
+class AI3ldhpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+class AI3ldshpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+class AI3ldsbpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+ asm, cstr,pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 0; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 0; // P bit
+}
+
+
+// addrmode4 instructions
+class AXI4ld<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{20} = 1; // L bit
+ let Inst{22} = 0; // S bit
+ let Inst{27-25} = 0b100;
+}
+class AXI4st<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+ "", pattern> {
+ let Inst{20} = 0; // L bit
+ let Inst{22} = 0; // S bit
+ let Inst{27-25} = 0b100;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 0; // S bit
+ let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b1001;
+ let Inst{20} = 1;
+ let Inst{27-21} = opcod;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+ asm, "", pattern> {
+ let Inst{4} = 0;
+ let Inst{7} = 1;
+ let Inst{20} = 0;
+ let Inst{27-21} = opcod;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, opc,
+ asm, "", pattern> {
+ let Inst{7-4} = 0b0111;
+ let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, opc,
+ asm, "", pattern> {
+ let Inst{27-20} = opcod;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Thumb Instruction Format Definitions.
+//
+
+
+// TI - Thumb instruction.
+
+class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class TI<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI1<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>;
+class TI2<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>;
+class TI4<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>;
+class TIs<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+// BL, BLX(1) are translated by assembler into two instructions
+class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+// BR_JT instructions
+class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
+ : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1011;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStFrm, opc, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-24} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1010;
+}
+
+// Load / store multiple
+class AXSI5<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStMulFrm, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1011;
+}
+
+class AXDI5<dag oops, dag iops, string asm, list<dag> pattern>
+ : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+ VFPLdStMulFrm, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+ let Inst{27-25} = 0b110;
+ let Inst{11-8} = 0b1010;
+}
+
+
+// Double precision, unary
+class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-4} = opcod3;
+}
+
+// Double precision, binary
+class ADbI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+ let Inst{27-20} = opcod;
+ let Inst{11-8} = 0b1011;
+}
+
+// Single precision, unary
+class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+ // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding.
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = 0b1010;
+ let Inst{7-4} = opcod3;
+}
+
+// Single precision, binary
+class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+ // Bit 22 (D bit) can be changed during instruction encoding.
+ let Inst{27-20} = opcod;
+ let Inst{11-8} = 0b1010;
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
+ dag oops, dag iops, string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, VFPConv1Frm, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{19-16} = opcod2;
+ let Inst{11-8} = opcod3;
+ let Inst{6} = 1;
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+ string opc, string asm, list<dag> pattern>
+ : AI<oops, iops, f, opc, asm, pattern> {
+ let Inst{27-20} = opcod1;
+ let Inst{11-8} = opcod2;
+ let Inst{4} = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+ string asm, list<dag> pattern>
+ : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+
+// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, HasV5T];
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 000000000000..4b0dbb5dacdb
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,1025 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+ RI(*this, STI) {
+}
+
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ switch (oc) {
+ default:
+ return false;
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ case ARM::MOVr:
+ case ARM::tMOVr:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2hir:
+ assert(MI.getDesc().getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "Invalid ARM MOV instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDR:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FLDD:
+ case ARM::FLDS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tRestore:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STR:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImm() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FSTD:
+ case ARM::FSTS:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tSpill:
+ if (MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ DebugLoc dl = Orig->getDebugLoc();
+ if (Orig->getOpcode() == ARM::MOVi2pieces) {
+ RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
+ Orig->getOperand(2).getImm(),
+ Orig->getOperand(3).getReg(), this, false, dl);
+ return;
+ }
+
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDR;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRB;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STR;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRB;
+ }
+ return 0;
+}
+
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ unsigned TSFlags = MI->getDesc().TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ bool isLoad = !TID.mayStore();
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-3);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+ ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ int SOImmVal = ARM_AM::getSOImmVal(Amt);
+ if (SOImmVal == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(SOImmVal)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+ .addImm(Pred).addReg(0).addReg(0);
+ } else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt)
+ .addImm(Pred).addReg(0).addReg(0);
+ else
+ UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg)
+ .addImm(Pred).addReg(0).addReg(0);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ else
+ MemMI = BuildMI(MF, MI->getDebugLoc(),
+ get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ if (LV) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+
+ LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV->addVirtualRegisterDead(Reg, NewMI);
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ if (!NewMI->readsRegister(Reg))
+ continue;
+ LV->addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+ (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ Cond.push_back(SecondLastInst->getOperand(2));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // ...likewise if it ends with a branch table followed by an unconditional
+ // branch. The branch folder can create these, and we must get rid of them for
+ // correctness of Thumb constant islands.
+ if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
+ SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
+ (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != BccOpc)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+ BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ return 2;
+}
+
+bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (!AFI->isThumbFunction()) {
+ if (DestRC == ARM::GPRRegisterClass) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg)));
+ return true;
+ }
+ } else {
+ if (DestRC == ARM::GPRRegisterClass) {
+ if (SrcRC == ARM::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+ return true;
+ } else if (SrcRC == ARM::tGPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+ return true;
+ }
+ } else if (DestRC == ARM::tGPRRegisterClass) {
+ if (SrcRC == ARM::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+ return true;
+ } else if (SrcRC == ARM::tGPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+ return true;
+ }
+ }
+ }
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+
+ if (DestRC == ARM::SPRRegisterClass)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+ .addReg(SrcReg));
+ else if (DestRC == ARM::DPRRegisterClass)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+ .addReg(SrcReg));
+ else
+ return false;
+
+ return true;
+}
+
+void ARMInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (!AFI->isThumbFunction());
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addReg(0).addImm(0));
+ } else if (RC == ARM::tGPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (AFI->isThumbFunction());
+ BuildMI(MBB, I, DL, get(ARM::tSpill))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0));
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0));
+ }
+}
+
+void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == ARM::GPRRegisterClass) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction()) {
+ Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+ }
+ Opc = ARM::STR;
+ } else if (RC == ARM::DPRRegisterClass) {
+ Opc = ARM::FSTD;
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ Opc = ARM::FSTS;
+ }
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ AddDefaultPred(MIB);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void ARMInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (!AFI->isThumbFunction());
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
+ .addFrameIndex(FI).addReg(0).addImm(0));
+ } else if (RC == ARM::tGPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ assert (AFI->isThumbFunction());
+ BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+ .addFrameIndex(FI).addImm(0));
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+ .addFrameIndex(FI).addImm(0));
+ }
+}
+
+void ARMInstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == ARM::GPRRegisterClass) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction()) {
+ Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+ }
+ Opc = ARM::LDR;
+ } else if (RC == ARM::DPRRegisterClass) {
+ Opc = ARM::FLDD;
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ Opc = ARM::FLDS;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ AddDefaultPred(MIB);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+bool ARMInstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ MIB.addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool ARMInstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+ MBB.insert(MI, PopMI);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ Reg = ARM::PC;
+ PopMI->setDesc(get(ARM::tPOP_RET));
+ MBB.erase(MI);
+ }
+ PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+ }
+ return true;
+}
+
+MachineInstr *ARMInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = NULL;
+ switch (Opc) {
+ default: break;
+ case ARM::MOVr: {
+ if (MI->getOperand(4).getReg() == ARM::CPSR)
+ // If it is updating CPSR, then it cannot be folded.
+ break;
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::tMOVr:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVhir2hir: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+ // tSpill cannot take a high register operand.
+ break;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+ // tRestore cannot target a high register operand.
+ break;
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addImm(0);
+ }
+ break;
+ }
+ case ARM::FCPYS: {
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+ .addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg)
+ .addFrameIndex(FI)
+ .addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ case ARM::FCPYD: {
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+ }
+ break;
+ }
+ }
+
+ return NewMI;
+}
+
+bool ARMInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default: break;
+ case ARM::MOVr:
+ // If it is updating CPSR, then it cannot be folded.
+ return MI->getOperand(4).getReg() != ARM::CPSR;
+ case ARM::tMOVr:
+ case ARM::tMOVlor2hir:
+ case ARM::tMOVhir2lor:
+ case ARM::tMOVhir2hir: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+ // tSpill cannot take a high register operand.
+ return false;
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+ // tRestore cannot target a high register operand.
+ return false;
+ }
+ return true;
+ }
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ return true;
+ }
+
+ return false;
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case ARM::BX_RET: // Return.
+ case ARM::LDM_RET:
+ case ARM::tBX_RET:
+ case ARM::tBX_RET_vararg:
+ case ARM::tPOP_RET:
+ case ARM::B:
+ case ARM::tB: // Uncond branch.
+ case ARM::tBR_JTr:
+ case ARM::BR_JTr: // Jumptable branch.
+ case ARM::BR_JTm: // Jumptable branch through mem.
+ case ARM::BR_JTadd: // Jumptable branch add to pc.
+ return true;
+ default: return false;
+ }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
+}
+
+bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::B || Opc == ARM::tB) {
+ MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
+ MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+ MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+ return true;
+ }
+
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx != -1) {
+ MachineOperand &PMO = MI->getOperand(PIdx);
+ PMO.setImm(Pred[0].getImm());
+ MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+ return true;
+ }
+ return false;
+}
+
+bool ARMInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ if (Pred1.size() > 2 || Pred2.size() > 2)
+ return false;
+
+ ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+ ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+ if (CC1 == CC2)
+ return true;
+
+ switch (CC1) {
+ default:
+ return false;
+ case ARMCC::AL:
+ return true;
+ case ARMCC::HS:
+ return CC2 == ARMCC::HI;
+ case ARMCC::LS:
+ return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+ case ARMCC::GE:
+ return CC2 == ARMCC::GT;
+ case ARMCC::LE:
+ return CC2 == ARMCC::LT;
+ }
+}
+
+bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+ return false;
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+
+ return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+ // Basic size info comes from the TSFlags field.
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned TSFlags = TID.TSFlags;
+
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default: {
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+ if (MI->isLabel())
+ return 0;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Unknown or unset size field for instr!");
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ return 0;
+ }
+ break;
+ }
+ case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
+ case ARMII::Size4Bytes: return 4; // Arm instruction.
+ case ARMII::Size2Bytes: return 2; // Thumb instruction.
+ case ARMII::SizeSpecial: {
+ switch (MI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_setjmp: return 12;
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd:
+ case ARM::tBR_JTr: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries.
+ unsigned NumOps = TID.getNumOperands();
+ MachineOperand JTOP =
+ MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+ unsigned JTI = JTOP.getIndex();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+ // 4 aligned. The assembler / linker may add 2 byte padding just before
+ // the JT entries. The size does not include this padding; the
+ // constant islands pass does separate bookkeeping for it.
+ // FIXME: If we know the size of the function is less than (1 << 16) *2
+ // bytes, we can use 16-bit entries instead. Then there won't be an
+ // alignment issue.
+ return getNumJTEntries(JT, JTI) * 4 +
+ (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ }
+ }
+ return 0; // Not reached
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 000000000000..13ff3fea84be
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,258 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+
+namespace llvm {
+ class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This four-bit field describes the addressing mode used.
+
+ AddrModeMask = 0xf,
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrModeT1 = 6,
+ AddrModeT2 = 7,
+ AddrModeT4 = 8,
+ AddrModeTs = 9, // i8 * 4 for pc and sp relative data
+
+ // Size* - Flags to keep track of the size of an instruction.
+ SizeShift = 4,
+ SizeMask = 7 << SizeShift,
+ SizeSpecial = 1, // 0 byte pseudo or special case.
+ Size8Bytes = 2,
+ Size4Bytes = 3,
+ Size2Bytes = 4,
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+ // and store ops
+ IndexModeShift = 7,
+ IndexModeMask = 3 << IndexModeShift,
+ IndexModePre = 1,
+ IndexModePost = 2,
+
+ //===------------------------------------------------------------------===//
+ // Misc flags.
+
+ // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+ // it doesn't have a Rn operand.
+ UnaryDP = 1 << 9,
+
+ //===------------------------------------------------------------------===//
+ // Instruction encoding formats.
+ //
+ FormShift = 10,
+ FormMask = 0x1f << FormShift,
+
+ // Pseudo instructions
+ Pseudo = 0 << FormShift,
+
+ // Multiply instructions
+ MulFrm = 1 << FormShift,
+
+ // Branch instructions
+ BrFrm = 2 << FormShift,
+ BrMiscFrm = 3 << FormShift,
+
+ // Data Processing instructions
+ DPFrm = 4 << FormShift,
+ DPSoRegFrm = 5 << FormShift,
+
+ // Load and Store
+ LdFrm = 6 << FormShift,
+ StFrm = 7 << FormShift,
+ LdMiscFrm = 8 << FormShift,
+ StMiscFrm = 9 << FormShift,
+ LdStMulFrm = 10 << FormShift,
+
+ // Miscellaneous arithmetic instructions
+ ArithMiscFrm = 11 << FormShift,
+
+ // Extend instructions
+ ExtFrm = 12 << FormShift,
+
+ // VFP formats
+ VFPUnaryFrm = 13 << FormShift,
+ VFPBinaryFrm = 14 << FormShift,
+ VFPConv1Frm = 15 << FormShift,
+ VFPConv2Frm = 16 << FormShift,
+ VFPConv3Frm = 17 << FormShift,
+ VFPConv4Frm = 18 << FormShift,
+ VFPConv5Frm = 19 << FormShift,
+ VFPLdStFrm = 20 << FormShift,
+ VFPLdStMulFrm = 21 << FormShift,
+ VFPMiscFrm = 22 << FormShift,
+
+ // Thumb format
+ ThumbFrm = 23 << FormShift,
+
+ //===------------------------------------------------------------------===//
+ // Field shifts - such shifts are used to set field while generating
+ // machine instructions.
+ M_BitShift = 5,
+ ShiftImmShift = 5,
+ ShiftShift = 7,
+ N_BitShift = 7,
+ ImmHiShift = 8,
+ SoRotImmShift = 8,
+ RegRsShift = 8,
+ ExtRotImmShift = 10,
+ RegRdLoShift = 12,
+ RegRdShift = 12,
+ RegRdHiShift = 16,
+ RegRnShift = 16,
+ S_BitShift = 20,
+ W_BitShift = 21,
+ AM3_I_BitShift = 22,
+ D_BitShift = 22,
+ U_BitShift = 23,
+ P_BitShift = 24,
+ I_BitShift = 25,
+ CondShift = 28
+ };
+}
+
+class ARMInstrInfo : public TargetInstrInfoImpl {
+ const ARMRegisterInfo RI;
+public:
+ explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ // Predication support.
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+ int PIdx = MI->findFirstPredOperandIdx();
+ return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+ : ARMCC::AL;
+ }
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 000000000000..680e77234db2
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,1390 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInFlag]>;
+def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+ [SDNPInFlag]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMcmpNZ : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()),
+ MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getZExtValue();
+ return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
+ }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{
+ return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1;
+ }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+def brtarget : Operand<OtherVT>;
+
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+def jtblock_operand : Operand<i32> {
+ let PrintMethod = "printJTBlockOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+ PatLeaf<(imm),
+ [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }],
+ so_imm_XFORM> {
+ let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : Operand<i32>,
+ PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+ }]> {
+ let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+// Define ARM specific addressing modes.
+
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let PrintMethod = "printAddrMode3Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+ ComplexPattern<i32, 2, "", []> {
+ let PrintMethod = "printAddrMode4Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+ (ops (i32 14), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+//
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ let PrintMethod = "printSBitModifierOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags. These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+ bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrModeT1 : AddrMode<6>;
+def AddrModeT2 : AddrMode<7>;
+def AddrModeT4 : AddrMode<8>;
+def AddrModeTs : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+ bits<3> Value = val;
+}
+def SizeInvalid : SizeFlagVal<0>; // Unset.
+def SizeSpecial : SizeFlagVal<1>; // Pseudo or special.
+def Size8Bytes : SizeFlagVal<2>;
+def Size4Bytes : SizeFlagVal<3>;
+def Size2Bytes : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, " $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+
+/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CSPR register.
+let Defs = [CPSR] in {
+multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, "s $dst, $a, $b",
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_imm:$b)]>;
+ def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, GPR:$b)]>;
+ def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ opc, " $a, $b",
+ [(opnode GPR:$a, so_reg:$b)]>;
+}
+}
+
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src),
+ opc, " $dst, $Src",
+ [(set GPR:$dst, (opnode GPR:$Src))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
+ opc, " $dst, $Src, ror $rot",
+ [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+}
+
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+ opc, " $dst, $LHS, $RHS",
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[IsARM, HasV6]>;
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ opc, " $dst, $LHS, $RHS, ror $rot",
+ [(set GPR:$dst, (opnode GPR:$LHS,
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
+/// setting carry bit. But it can optionally set CPSR.
+let Uses = [CPSR] in {
+multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
+ def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+ DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s),
+ DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+ DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size),
+ "${instid:label} ${cpidx:cpentry}", []>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p),
+ "@ ADJCALLSTACKUP $amt1",
+ [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p),
+ "@ ADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start timm:$amt)]>;
+}
+
+def DWARF_LOC :
+PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+ Pseudo, "$cp:\n\tadd$p $dst, pc, $a",
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+let canFoldAsLoad = 1 in
+def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr$p $dst, $addr",
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr$p $src, $addr",
+ [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+ Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1 in
+ def LDM_RET : AXI4ld<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+ def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>;
+
+ def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+ "bl", " ${func:call}",
+ [(ARMcall_pred tglobaladdr:$func)]>;
+
+ // ARMv5T and above
+ def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ "blx $func",
+ [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> {
+ let Inst{7-4} = 0b0011;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ }
+
+ let Uses = [LR] in {
+ // ARMv4T
+ def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ "mov lr, pc\n\tbx $func",
+ [(ARMcall_nolink GPR:$func)]>;
+ }
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // B is "predicable" since it can be xformed into a Bcc.
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target",
+ [(br bb:$target)]>;
+
+ let isNotDuplicable = 1, isIndirectBranch = 1 in {
+ def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
+ "mov pc, $target \n$jt",
+ [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{20} = 0; // S Bit
+ let Inst{24-21} = 0b1101;
+ let Inst{27-26} = {0,0};
+ }
+ def BR_JTm : JTI<(outs),
+ (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
+ "ldr pc, $target \n$jt",
+ [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+ imm:$id)]> {
+ let Inst{20} = 1; // L bit
+ let Inst{21} = 0; // W bit
+ let Inst{22} = 0; // B bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-26} = {0,1};
+ }
+ def BR_JTadd : JTI<(outs),
+ (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+ "add pc, $target, $idx \n$jt",
+ [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+ imm:$id)]> {
+ let Inst{20} = 0; // S bit
+ let Inst{24-21} = 0b0100;
+ let Inst{27-26} = {0,0};
+ }
+ } // isNotDuplicable = 1, isIndirectBranch = 1
+ } // isBarrier = 1
+
+ // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
+ "b", " $target",
+ [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1 in
+def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr",
+ [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr", []>;
+
+// Loads with zero extension
+def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "h $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+ "ldr", "b $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1 in {
+// Load doubleword
+def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "d $dst, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode2:$addr), LdFrm,
+ "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am2offset:$offset), LdFrm,
+ "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode2:$addr), LdFrm,
+ "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am2offset:$offset), LdFrm,
+ "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+ "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+}
+
+// Store
+def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+ "str", " $src, $addr",
+ [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ "str", "h $src, $addr",
+ [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+ "str", "b $src, $addr",
+ [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+let mayStore = 1 in
+def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ "str", "d $src, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm,
+ "str", " $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2stwpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", " $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+ "str", "h $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+ "str", "h $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+ GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", "b $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+ "str", "b $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// FIXME: $dst1 should be a def.
+let mayLoad = 1 in
+def LDM : AXI4ld<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+ LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+ []>;
+
+let mayStore = 1 in
+def STM : AXI4st<(outs),
+ (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+ LdStMulFrm, "stm${p}${addr:submode} $addr, $src1",
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+
+def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+ "mov", " $dst, $src", []>, UnaryDP;
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+ "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm,
+ "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP;
+
+def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", " $dst, $src, rrx",
+ [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", "s $dst, $src, lsr #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
+def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+ "mov", "s $dst, $src, asr #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+}
+
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB : AI_unary_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_unary_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_bin_rrot<0b01101010,
+ "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<0b01101011,
+ "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB : AI_unary_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_unary_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AsI1_bin_irs<0b0100, "add",
+ BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm SUB : AsI1_bin_irs<0b0010, "sub",
+ BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : ASI1_bin_s_irs<0b0100, "add",
+ BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : ASI1_bin_s_irs<0b0010, "sub",
+ BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// FIXME: Do not allow ADC / SBC to be predicated for now.
+defm ADC : AsXI1_bin_c_irs<0b0101, "adc",
+ BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SBC : AsXI1_bin_c_irs<0b0110, "sbc",
+ BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// These don't define reg/reg forms, because they are handled above.
+def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>;
+
+def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ "rsb", " $dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
+
+// RSB with 's' bit set.
+let Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>;
+def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+ "rsb", "s $dst, $a, $b",
+ [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
+}
+
+// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
+let Uses = [CPSR] in {
+def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+ DPFrm, "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>;
+def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+ DPSoRegFrm, "rsc${s} $dst, $a, $b",
+ [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>;
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
+
+//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+// (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm),
+// (SBCri GPR:$src, so_imm_neg:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
+
+defm AND : AsI1_bin_irs<0b0000, "and",
+ BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR : AsI1_bin_irs<0b1100, "orr",
+ BinOpFrag<(or node:$LHS, node:$RHS)>>;
+defm EOR : AsI1_bin_irs<0b0001, "eor",
+ BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC : AsI1_bin_irs<0b1110, "bic",
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+ "mvn", " $dst, $src",
+ [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
+def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+ "mvn", " $dst, $src",
+ [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
+ "mvn", " $dst, $imm",
+ [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP;
+
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
+
+def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ "mul", " $dst, $a, $b",
+ [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
+
+def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "mla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "smull", " $ldst, $hdst, $a, $b", []>;
+
+def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umull", " $ldst, $hdst, $a, $b", []>;
+
+// Multiply + accumulate
+def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "smlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
+ (ins GPR:$a, GPR:$b),
+ "umaal", " $ldst, $hdst, $a, $b", []>,
+ Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ "smmul", " $dst, $a, $b",
+ [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "smmla", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0001;
+}
+
+
+def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "smmls", " $dst, $a, $b, $c",
+ [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1101;
+}
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "bb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "bt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+
+ def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "tb"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "tt"), " $dst, $a, $b",
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+
+ def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "wb"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ !strconcat(opc, "wt"), " $dst, $a, $b",
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+
+ def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 0;
+ }
+
+ def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 1;
+ let Inst{6} = 1;
+ }
+
+ def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 0;
+ }
+
+ def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]> {
+ let Inst{5} = 0;
+ let Inst{6} = 1;
+ }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
+
+def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src),
+ "clz", " $dst, $src",
+ [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+ "rev", " $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+ "rev16", " $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src),
+ "revsh", " $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFF00), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
+ let Inst{19-16} = 0b1111;
+}
+
+def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
+ (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+ (and (shl GPR:$src2, (i32 imm:$shamt)),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{6-4} = 0b001;
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+ (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+ (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
+ (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+ (and (sra GPR:$src2, imm16_31:$shamt),
+ 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ let Inst{6-4} = 0b101;
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+ (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN : AI1_cmp_irs<0b1011, "cmn",
+ BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST : AI1_cmp_irs<0b1000, "tst",
+ BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+
+defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
+ BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm CMNnz : AI1_cmp_irs<0b1011, "cmn",
+ BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCs : AI1<0b1101, (outs GPR:$dst),
+ (ins GPR:$false, so_reg:$true), DPSoRegFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCi : AI1<0b1101, (outs GPR:$dst),
+ (ins GPR:$false, so_imm:$true), DPFrm,
+ "mov", " $dst, $true",
+ [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $dst">, UnaryDP;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p),
+ Pseudo,
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add$p $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR] in {
+ def TPsoft : ABXI<0b1011, (outs), (ins),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is a three instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+let Defs =
+ [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR,
+ D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in {
+ def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
+ AddrModeNone, SizeSpecial, IndexModeNone, Pseudo,
+ "add r0, pc, #4\n\t"
+ "str r0, [$src, #+4]\n\t"
+ "mov r0, #0 @ eh_setjmp", "",
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Large immediate handling.
+
+// Two piece so_imms.
+let isReMaterializable = 1 in
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo,
+ "mov", " $dst, $src",
+ [(set GPR:$dst, so_imm2part:$src)]>;
+
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+ (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+ (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16),
+ (sra (shl GPR:$b, 16), 16))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, 16))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, 16), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), 16)),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 000000000000..ffb83a8b4d36
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,562 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getZExtValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift.
+// This uses thumb_immshifted to match and thumb_immshifted_val and
+// thumb_immshifted_shamt to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// Define Thumb specific addressing modes.
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_s4 := reg + reg
+// reg + imm5 * 4
+//
+def t_addrmode_s4 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
+ let PrintMethod = "printThumbAddrModeS4Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s2 := reg + reg
+// reg + imm5 * 2
+//
+def t_addrmode_s2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
+ let PrintMethod = "printThumbAddrModeS2Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s1 := reg + reg
+// reg + imm5
+//
+def t_addrmode_s1 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
+ let PrintMethod = "printThumbAddrModeS1Operand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let Defs = [SP], Uses = [SP] in {
+def tADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "@ tADJCALLSTACKUP $amt1",
+ [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>;
+
+def tADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt),
+ "@ tADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>;
+}
+
+let isNotDuplicable = 1 in
+def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+ "$cp:\n\tadd $dst, pc",
+ [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in {
+ def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>;
+ // Alternative return instruction used by vararg functions.
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1 in
+def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins),
+ "pop $dst1", []>;
+
+let isCall = 1,
+ Defs = [R0, R1, R2, R3, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7] in {
+ def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMtcall tglobaladdr:$func)]>;
+ // ARMv5T and above
+ def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops),
+ "blx ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+ def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops),
+ "blx $func",
+ [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>;
+ // ARMv4T
+ def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops),
+ "cpy lr, pc\n\tbx $func",
+ [(ARMcall_nolink tGPR:$func)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1 in {
+ let isPredicable = 1 in
+ def tB : TI<(outs), (ins brtarget:$target), "b $target",
+ [(br bb:$target)]>;
+
+ // Far jump
+ def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>;
+
+ def tBR_JTr : TJTI<(outs),
+ (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
+ "cpy pc, $target \n\t.align\t2\n$jt",
+ [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
+ }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+ def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+ [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+let canFoldAsLoad = 1 in
+def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
+
+def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
+ "ldrb $dst, $addr",
+ [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
+
+def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
+ "ldrh $dst, $addr",
+ [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
+
+def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ "ldrsb $dst, $addr",
+ [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+ "ldrsh $dst, $addr",
+ [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+let canFoldAsLoad = 1, mayLoad = 1 in
+def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+ "ldr $dst, $addr", []>;
+
+// Load tconstpool
+let canFoldAsLoad = 1 in
+def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+ "ldr $dst, $addr",
+ [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def tLDRcp : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+ "ldr $dst, $addr", []>;
+
+def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
+ "str $src, $addr",
+ [(store tGPR:$src, t_addrmode_s4:$addr)]>;
+
+def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
+
+def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
+
+def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr",
+ [(store tGPR:$src, t_addrmode_sp:$addr)]>;
+
+let mayStore = 1 in {
+// Special instruction for spill. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// TODO: A7-44: LDMIA - load multiple
+
+let mayLoad = 1 in
+def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins),
+ "pop $dst1", []>;
+
+let mayStore = 1 in
+def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
+ "push $src1", []>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+// Add with carry
+def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "adc $dst, $rhs",
+ [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
+
+def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
+
+def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ "add $dst, $rhs @ addhirr", []>;
+
+def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs),
+ "add $dst, pc, $rhs * 4", []>;
+
+def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
+ "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs * 4", []>;
+
+def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "and $dst, $rhs",
+ [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
+
+def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "asr $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>;
+
+def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "asr $dst, $rhs",
+ [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
+
+def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "bic $dst, $rhs",
+ [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
+
+
+def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+
+def tTST : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "tst $lhs, $rhs",
+ [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+
+def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+
+// TODO: A7-37: CMP(3) - cmp hi regs
+
+def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "eor $dst, $rhs",
+ [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "lsl $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "lsl $dst, $rhs",
+ [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "lsr $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "lsr $dst, $rhs",
+ [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
+
+// FIXME: This is not rematerializable because mov changes the condition code.
+def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
+ "mov $dst, $src",
+ [(set tGPR:$dst, imm0_255:$src)]>;
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+
+// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+// which is MOV(3). This also supports high registers.
+def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "cpy $dst, $src", []>;
+def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
+ "cpy $dst, $src\t@ hir2lor", []>;
+def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
+ "cpy $dst, $src\t@ lor2hir", []>;
+def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
+ "cpy $dst, $src\t@ hir2hir", []>;
+
+def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "mul $dst, $rhs",
+ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
+
+def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "mvn $dst, $src",
+ [(set tGPR:$dst, (not tGPR:$src))]>;
+
+def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "neg $dst, $src",
+ [(set tGPR:$dst, (ineg tGPR:$src))]>;
+
+def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "orr $dst, $rhs",
+ [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "rev $dst, $src",
+ [(set tGPR:$dst, (bswap tGPR:$src))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "rev16 $dst, $src",
+ [(set tGPR:$dst,
+ (or (and (srl tGPR:$src, 8), 0xFF),
+ (or (and (shl tGPR:$src, 8), 0xFF00),
+ (or (and (srl tGPR:$src, 8), 0xFF0000),
+ (and (shl tGPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "revsh $dst, $src",
+ [(set tGPR:$dst,
+ (sext_inreg
+ (or (srl (and tGPR:$src, 0xFFFF), 8),
+ (shl tGPR:$src, 8)), i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "ror $dst, $rhs",
+ [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// Subtract with carry
+def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sbc $dst, $rhs",
+ [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
+
+def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs",
+ [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
+
+def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs * 4", []>;
+
+def tSXTB : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "sxtb $dst, $src",
+ [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
+ Requires<[IsThumb, HasV6]>;
+def tSXTH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "sxth $dst, $src",
+ [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+def tUXTB : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "uxtb $dst, $src",
+ [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
+ Requires<[IsThumb, HasV6]>;
+def tUXTH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+ "uxth $dst, $src",
+ [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Expanded by the scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
+ def tMOVCCr :
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+ "@ tMOVCCr $cc",
+ [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+ "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+ Defs = [R0, LR] in {
+ def tTPsoft : TIx2<(outs), (ins),
+ "bl __aeabi_read_tp",
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress
+def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+
+// JumpTable
+def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+
+// Indirect calls to ARM routines
+def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+
+// zextload i1 -> zextload i8
+def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
+ (tLDRB t_addrmode_s1:$addr)>;
+
+// extload -> zextload
+def : ThumbPat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : ThumbPat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : ThumbPat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 000000000000..168fb45f11ea
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,12 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 000000000000..0247dafd07df
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,398 @@
+//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 :
+SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_FMDRR :
+SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let canFoldAsLoad = 1 in {
+def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+ "fldd", " $dst, $addr",
+ [(set DPR:$dst, (load addrmode5:$addr))]>;
+
+def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+ "flds", " $dst, $addr",
+ [(set SPR:$dst, (load addrmode5:$addr))]>;
+} // canFoldAsLoad
+
+def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+ "fstd", " $src, $addr",
+ [(store DPR:$src, addrmode5:$addr)]>;
+
+def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+ "fsts", " $src, $addr",
+ [(store SPR:$src, addrmode5:$addr)]>;
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+let mayLoad = 1 in {
+def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+ variable_ops),
+ "fldm${addr:submode}d${p} ${addr:base}, $dst1",
+ []> {
+ let Inst{20} = 1;
+}
+
+def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+ variable_ops),
+ "fldm${addr:submode}s${p} ${addr:base}, $dst1",
+ []> {
+ let Inst{20} = 1;
+}
+}
+
+let mayStore = 1 in {
+def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+ variable_ops),
+ "fstm${addr:submode}d${p} ${addr:base}, $src1",
+ []> {
+ let Inst{20} = 0;
+}
+
+def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+ variable_ops),
+ "fstm${addr:submode}s${p} ${addr:base}, $src1",
+ []> {
+ let Inst{20} = 0;
+}
+} // mayStore
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "faddd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+
+def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fadds", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+
+// These are encoded as unary instructions.
+def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+ "fcmped", " $a, $b",
+ [(arm_cmpfp DPR:$a, DPR:$b)]>;
+
+def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+ "fcmpes", " $a, $b",
+ [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fdivd", " $dst, $a, $b",
+ [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+
+def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fdivs", " $dst, $a, $b",
+ [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+
+def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+
+def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+
+def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fnmuld", " $dst, $a, $b",
+ [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
+ let Inst{6} = 1;
+}
+
+def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fnmuls", " $dst, $a, $b",
+ [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
+ let Inst{6} = 1;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), DPR:$b),
+ (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+ (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+
+def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ "fsubd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
+ let Inst{6} = 1;
+}
+
+def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ "fsubs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
+ let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ "fabsd", " $dst, $a",
+ [(set DPR:$dst, (fabs DPR:$a))]>;
+
+def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ "fabss", " $dst, $a",
+ [(set SPR:$dst, (fabs SPR:$a))]>;
+
+def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+ "fcmpezd", " $a",
+ [(arm_cmpfp0 DPR:$a)]>;
+
+def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+ "fcmpezs", " $a",
+ [(arm_cmpfp0 SPR:$a)]>;
+
+def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+ "fcvtds", " $dst, $a",
+ [(set DPR:$dst, (fextend SPR:$a))]>;
+
+// Special case encoding: bits 11-8 is 0b1011.
+def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+ "fcvtsd", " $dst, $a",
+ [(set SPR:$dst, (fround DPR:$a))]> {
+ let Inst{27-23} = 0b11101;
+ let Inst{21-16} = 0b110111;
+ let Inst{11-8} = 0b1011;
+ let Inst{7-4} = 0b1100;
+}
+
+def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ "fcpyd", " $dst, $a", []>;
+
+def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ "fcpys", " $dst, $a", []>;
+
+def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ "fnegd", " $dst, $a",
+ [(set DPR:$dst, (fneg DPR:$a))]>;
+
+def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ "fnegs", " $dst, $a",
+ [(set SPR:$dst, (fneg SPR:$a))]>;
+
+def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ "fsqrtd", " $dst, $a",
+ [(set DPR:$dst, (fsqrt DPR:$a))]>;
+
+def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ "fsqrts", " $dst, $a",
+ [(set SPR:$dst, (fsqrt SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+ "fmrs", " $dst, $src",
+ [(set GPR:$dst, (bitconvert SPR:$src))]>;
+
+def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+ "fmsr", " $dst, $src",
+ [(set SPR:$dst, (bitconvert GPR:$src))]>;
+
+def FMRRD : AVConv3I<0b11000101, 0b1011,
+ (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src),
+ "fmrrd", " $dst1, $dst2, $src",
+ [/* FIXME: Can't write pattern for multiple result instr*/]>;
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def FMDRR : AVConv5I<0b11000100, 0b1011,
+ (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
+ "fmdrr", " $dst, $src1, $src2",
+ [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX : SPR system reg -> GPR
+
+// FMSRR: GPR -> SPR
+
+// FMXR: GPR -> VFP Sstem reg
+
+
+// Int to FP:
+
+def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ "fsitod", " $dst, $a",
+ [(set DPR:$dst, (arm_sitof SPR:$a))]> {
+ let Inst{7} = 1;
+}
+
+def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+ "fsitos", " $dst, $a",
+ [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+ let Inst{7} = 1;
+}
+
+def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ "fuitod", " $dst, $a",
+ [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+
+def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+ "fuitos", " $dst, $a",
+ [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+
+// FP to Int:
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+
+def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+ (outs SPR:$dst), (ins DPR:$a),
+ "ftosizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010,
+ (outs SPR:$dst), (ins SPR:$a),
+ "ftosizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+ (outs SPR:$dst), (ins DPR:$a),
+ "ftouizd", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010,
+ (outs SPR:$dst), (ins SPR:$a),
+ "ftouizs", " $dst, $a",
+ [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+ let Inst{7} = 1; // Z bit
+}
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmacd", " $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmacs", " $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmscd", " $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmscs", " $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst"> {
+ let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100,
+ (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+ "fcpyd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100,
+ (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+ "fcpys", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
+ (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+ "fnegd", " $dst, $true",
+ [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
+ (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+ "fnegs", " $dst, $true",
+ [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
+ RegConstraint<"$false = $dst">;
+
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+let Defs = [CPSR] in
+def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> {
+ let Inst{27-20} = 0b11101111;
+ let Inst{19-16} = 0b0001;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = 0b1010;
+ let Inst{7} = 0;
+ let Inst{4} = 1;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 000000000000..e551c41936f8
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,298 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ abort();
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us (we need
+// to preserve more context and manipulate the stack directly). Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+ void ARMCompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl " ASMPREFIX "ARMCompilationCallback\n"
+ ASMPREFIX "ARMCompilationCallback:\n"
+ // Save caller saved registers since they may contain stuff
+ // for the real target function right now. We have to act as if this
+ // whole compilation callback doesn't exist as far as the caller is
+ // concerned, so we can't just preserve the callee saved regs.
+ "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#ifndef __SOFTFP__
+ "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ // The LR contains the address of the stub function on entry.
+ // pass it as the argument to the C part of the callback
+ "mov r0, lr\n"
+ "sub sp, sp, #4\n"
+ // Call the C portion of the callback
+ "bl " ASMPREFIX "ARMCompilationCallbackC\n"
+ "add sp, sp, #4\n"
+ // Restoring the LR to the return address of the function that invoked
+ // the stub and de-allocating the stack space for it requires us to
+ // swap the two saved LR values on the stack, as they're backwards
+ // for what we need since the pop instruction has a pre-determined
+ // order for the registers.
+ // +--------+
+ // 0 | LR | Original return address
+ // +--------+
+ // 1 | LR | Stub address (start of stub)
+ // 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
+ // 6-20 | D0..D7 | Saved VFP registers
+ // +--------+
+ //
+#ifndef __SOFTFP__
+ // Restore VFP caller-saved registers.
+ "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+ //
+ // We need to exchange the values in slots 0 and 1 so we can
+ // return to the address in slot 1 with the address in slot 0
+ // restored to the LR.
+ "ldr r0, [sp,#20]\n"
+ "ldr r1, [sp,#16]\n"
+ "str r1, [sp,#20]\n"
+ "str r0, [sp,#16]\n"
+ // Return to the (newly modified) stub to invoke the real function.
+ // The above twiddling of the saved return addresses allows us to
+ // deallocate everything, including the LR the stub saved, all in one
+ // pop instruction.
+ "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n"
+ );
+#else // Not an ARM host
+ void ARMCompilationCallback() {
+ assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call. We're replacing the first two instructions of the
+ // stub with:
+ // ldr pc, [pc,#-4]
+ // <addr>
+ if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+ cerr << "ERROR: Unable to mark stub writable\n";
+ abort();
+ }
+ *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
+ *(intptr_t *)(StubAddr+4) = NewVal;
+ if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+ cerr << "ERROR: Unable to mark stub executable\n";
+ abort();
+ }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+ JITCodeEmitter &JCE) {
+ JCE.startGVStub(GV, 4, 4);
+ JCE.emitWordLE((intptr_t)Ptr);
+ void *PtrAddr = JCE.finishGVStub(GV);
+ addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+ return PtrAddr;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+ // Branch to the corresponding function addr.
+ if (IsPIC) {
+ // The stub is 8-byte size and 4-aligned.
+ intptr_t LazyPtr = getIndirectSymAddr(Fn);
+ if (!LazyPtr) {
+ // In PIC mode, the function stub is loading a lazy-ptr.
+ LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+ if (F)
+ DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '"
+ << F->getName() << "'\n";
+ else
+ DOUT << "JIT: Stub emitted at [" << LazyPtr
+ << "] for external function at '" << Fn << "'\n";
+ }
+ JCE.startGVStub(F, 16, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4]
+ JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
+ JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
+ JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8)
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+ } else {
+ // The stub is 8-byte size and 4-aligned.
+ JCE.startGVStub(F, 8, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ JCE.emitWordLE((intptr_t)Fn); // addr of function
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
+ }
+ } else {
+ // The compilation callback will overwrite the first two words of this
+ // stub with indirect branch instructions targeting the compiled code.
+ // This stub sets the return address to restart the stub, so that
+ // the new branch will be invoked when we come back.
+ //
+ // Branch and link to the compilation callback.
+ // The stub is 16-byte size and 4-byte aligned.
+ JCE.startGVStub(F, 16, 4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ // Save LR so the callback can determine which stub called it.
+ // The compilation callback is responsible for popping this prior
+ // to returning.
+ JCE.emitWordLE(0xe92d4000); // push {lr}
+ // Set the return address to go back to the start of this stub.
+ JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+ // Invoke the compilation callback.
+ JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+ // The address of the compilation callback.
+ JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+ }
+
+ return JCE.finishGVStub(F);
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+ ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+ switch (RT) {
+ default:
+ return (intptr_t)(MR->getResultPointer());
+ case ARM::reloc_arm_pic_jt:
+ // Destination address - jump table base.
+ return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+ case ARM::reloc_arm_jt_base:
+ // Jump table base address.
+ return getJumpTableBaseAddr(MR->getJumpTableIndex());
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ // Constant pool entry address.
+ return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+ case ARM::reloc_arm_machine_cp_entry: {
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+ assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+ "Can't handle this machine constant pool entry yet!");
+ intptr_t Addr = (intptr_t)(MR->getResultPointer());
+ Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+ return Addr;
+ }
+ }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = resolveRelocDestAddr(MR);
+ switch ((ARM::RelocationType)MR->getRelocationType()) {
+ case ARM::reloc_arm_cp_entry:
+ case ARM::reloc_arm_vfp_cp_entry:
+ case ARM::reloc_arm_relative: {
+ // It is necessary to calculate the correct PC relative value. We
+ // subtract the base addr from the target addr to form a byte offset.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ // If the result is positive, set bit U(23) to 1.
+ if (ResultPtr >= 0)
+ *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+ else {
+ // Otherwise, obtain the absolute value and set bit U(23) to 0.
+ *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+ ResultPtr = - ResultPtr;
+ }
+ // Set the immed value calculated.
+ // VFP immediate offset is multiplied by 4.
+ if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+ ResultPtr = ResultPtr >> 2;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ // Set register Rn to PC.
+ *((intptr_t*)RelocPos) |=
+ ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ break;
+ }
+ case ARM::reloc_arm_pic_jt:
+ case ARM::reloc_arm_machine_cp_entry:
+ case ARM::reloc_arm_absolute: {
+ // These addresses have already been resolved.
+ *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_branch: {
+ // It is necessary to calculate the correct value of signed_immed_24
+ // field. We subtract the base addr from the target addr to form a
+ // byte offset, which must be inside the range -33554432 and +33554428.
+ // Then, we set the signed_immed_24 field of the instruction to bits
+ // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+ assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ case ARM::reloc_arm_jt_base: {
+ // JT base - (instruction addr + 8)
+ ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+ *((intptr_t*)RelocPos) |= ResultPtr;
+ break;
+ }
+ }
+ }
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 000000000000..7dfeed8b7bf3
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,178 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+ class ARMTargetMachine;
+
+ class ARMJITInfo : public TargetJITInfo {
+ // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+ // CONSTPOOL_ENTRY addresses.
+ SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+ // JumpTableId2AddrMap - A map from inline jumptable ids to the
+ // corresponding inline jump table bases.
+ SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+ // PCLabelMap - A map from PC labels to addresses.
+ DenseMap<unsigned, intptr_t> PCLabelMap;
+
+ // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+ // addresses to their indirect symbol addresses.
+ DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+ // IsPIC - True if the relocation model is PIC. This is used to determine
+ // how to codegen function stubs.
+ bool IsPIC;
+
+ public:
+ explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// hasCustomConstantPool - Allows a target to specify that constant
+ /// pool address resolution is handled by the target.
+ virtual bool hasCustomConstantPool() const { return true; }
+
+ /// hasCustomJumpTables - Allows a target to specify that jumptables
+ /// are emitted by the target.
+ virtual bool hasCustomJumpTables() const { return true; }
+
+ /// allocateSeparateGVMemory - If true, globals should be placed in
+ /// separately allocated heap memory rather than in the same
+ /// code memory allocated by JITCodeEmitter.
+ virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+ return true;
+#else
+ return false;
+#endif
+ }
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+ /// jump table ids to jump table bases map; remember if codegen relocation
+ /// model is PIC.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+ }
+
+ /// getConstantPoolEntryAddr - The ARM target puts all constant
+ /// pool entries into constant islands. This returns the address of the
+ /// constant pool entry of the specified index.
+ intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ return ConstPoolId2AddrMap[CPI];
+ }
+
+ /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+ /// where its associated value is stored. When relocations are processed,
+ /// this value will be used to resolve references to the constant.
+ void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+ assert(CPI < ConstPoolId2AddrMap.size());
+ ConstPoolId2AddrMap[CPI] = Addr;
+ }
+
+ /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+ /// text section of the function. This returns the address of the base of
+ /// the jump table of the specified index.
+ intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+ assert(JTI < JumpTableId2AddrMap.size());
+ return JumpTableId2AddrMap[JTI];
+ }
+
+ /// addJumpTableBaseAddr - Map a jump table index to the address where
+ /// the corresponding inline jump table is emitted. When relocations are
+ /// processed, this value will be used to resolve references to the
+ /// jump table.
+ void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+ assert(JTI < JumpTableId2AddrMap.size());
+ JumpTableId2AddrMap[JTI] = Addr;
+ }
+
+ /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+ intptr_t getPCLabelAddr(unsigned Id) const {
+ DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+ assert(I != PCLabelMap.end());
+ return I->second;
+ }
+
+ /// addPCLabelAddr - Remember the address of the specified PC label.
+ void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+ PCLabelMap.insert(std::make_pair(Id, Addr));
+ }
+
+ /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+ /// specified symbol located at address. Returns 0 if the indirect symbol
+ /// has not been emitted.
+ intptr_t getIndirectSymAddr(void *Addr) const {
+ DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+ if (I != Sym2IndirectSymMap.end())
+ return I->second;
+ return 0;
+ }
+
+ /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+ /// its indirect symbol address.
+ void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+ Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+ }
+
+ private:
+ /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+ /// if it's not already solved. Constantpool entries must be resolved by
+ /// ARM target.
+ intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 000000000000..047552f627db
--- /dev/null
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,778 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+ static char ID;
+ ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ ARMFunctionInfo *AFI;
+ RegScavenger *RS;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+ : Offset(o), Position(p), MBBI(i), Merged(false) {};
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ SmallVector<MachineBasicBlock::iterator, 4>
+ MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps);
+
+ void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+ char ARMLoadStoreOpt::ID = 0;
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+ return new ARMLoadStoreOpt();
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode) {
+ switch (Opcode) {
+ case ARM::LDR:
+ NumLDMGened++;
+ return ARM::LDM;
+ case ARM::STR:
+ NumSTMGened++;
+ return ARM::STM;
+ case ARM::FLDS:
+ NumFLDMGened++;
+ return ARM::FLDMS;
+ case ARM::FSTS:
+ NumFSTMGened++;
+ return ARM::FSTMS;
+ case ARM::FLDD:
+ NumFLDMGened++;
+ return ARM::FLDMD;
+ case ARM::FSTD:
+ NumFSTMGened++;
+ return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, bool BaseKill, int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+ SmallVector<std::pair<unsigned, bool>, 8> &Regs,
+ const TargetInstrInfo *TII) {
+ // FIXME would it be better to take a DL from one of the loads arbitrarily?
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if (isAM4 && Offset == 4)
+ Mode = ARM_AM::ib;
+ else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+ Mode = ARM_AM::da;
+ else if (isAM4 && Offset == -4 * (int)NumRegs)
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (Opcode == ARM::LDR)
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1].first;
+ else {
+ // Use the scratch register to use as a new base.
+ NewBase = Scratch;
+ if (NewBase == 0)
+ return false;
+ }
+ int BaseOpc = ARM::ADDri;
+ if (Offset < 0) {
+ BaseOpc = ARM::SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ return false; // Probably not worth it then.
+
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ Base = NewBase;
+ BaseKill = true; // New base is always killed right its use.
+ }
+
+ bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+ bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ Opcode = getLoadStoreMultipleOpcode(Opcode);
+ MachineInstrBuilder MIB = (isAM4)
+ ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
+ : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+ .addImm(Pred).addReg(PredReg);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+ | getKillRegState(Regs[i].second));
+
+ return true;
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+SmallVector<MachineBasicBlock::iterator, 4>
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+ unsigned Base, int Opcode, unsigned Size,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch, MemOpQueue &MemOps) {
+ SmallVector<MachineBasicBlock::iterator, 4> Merges;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned Pos = MemOps[SIndex].Position;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+ unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+ bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+
+ SmallVector<std::pair<unsigned,bool>, 8> Regs;
+ Regs.push_back(std::make_pair(PReg, isKill));
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+ isKill = MemOps[i].MBBI->getOperand(0).isKill();
+ // AM4 - register numbers in ascending order.
+ // AM5 - consecutive register numbers in ascending order.
+ if (NewOffset == Offset + (int)Size &&
+ ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ Offset += Size;
+ Regs.push_back(std::make_pair(Reg, isKill));
+ PRegNum = RegNum;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned j = SIndex; j < i; ++j) {
+ MBB.erase(MemOps[j].MBBI);
+ MemOps[j].Merged = true;
+ }
+ }
+ SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
+ Merges.append(Merges2.begin(), Merges2.end());
+ return Merges;
+ }
+
+ if (MemOps[i].Position > Pos) {
+ Pos = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+ if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+ Scratch, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+ MBB.erase(MemOps[i].MBBI);
+ MemOps[i].Merged = true;
+ }
+ }
+
+ return Merges;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
+ int PIdx = MI->findFirstPredOperandIdx();
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMCC::AL;
+ }
+
+ PredReg = MI->getOperand(PIdx+1).getReg();
+ return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::SUBri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes, ARMCC::CondCodes Pred,
+ unsigned PredReg) {
+ unsigned MyPredReg = 0;
+ return (MI && MI->getOpcode() == ARM::ADDri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+ getInstrPredicate(MI, MyPredReg) == Pred &&
+ MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDR:
+ case ARM::STR:
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return 4;
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return 8;
+ case ARM::LDM:
+ case ARM::STM:
+ return (MI->getNumOperands() - 4) * 4;
+ case ARM::FLDMS:
+ case ARM::FSTMS:
+ case ARM::FLDMD:
+ case ARM::FSTMD:
+ return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ }
+}
+
+/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ int Opcode = MI->getOpcode();
+ bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+
+ if (isAM4) {
+ if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ // Can't use the updating AM4 sub-mode if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+ }
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ return true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ return true;
+ }
+ }
+ } else {
+ // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+ unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_PRE;
+ case ARM::STR: return ARM::STR_PRE;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_POST;
+ case ARM::STR: return ARM::STR_POST;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII,
+ bool &Advance,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+ (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+ return false;
+
+ bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
+ Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+ DoMerge = true;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge) {
+ if (NextMBBI == I) {
+ Advance = true;
+ ++I;
+ }
+ MBB.erase(NextMBBI);
+ }
+ }
+
+ if (!DoMerge)
+ return false;
+
+ bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+ : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+ true, isDPR ? 2 : 1);
+ if (isLd) {
+ if (isAM2)
+ // LDR_PRE, LDR_POST;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, RegState::Define)
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FLDMS, FLDMD
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Offset).addImm(Pred).addReg(PredReg)
+ .addReg(MI->getOperand(0).getReg(), RegState::Define);
+ } else {
+ MachineOperand &MO = MI->getOperand(0);
+ if (isAM2)
+ // STR_PRE, STR_POST;
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ .addReg(MO.getReg(), getKillRegState(BaseKill))
+ .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ else
+ // FSTMS, FSTMD
+ BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
+ .addImm(Pred).addReg(PredReg)
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()));
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::LDR:
+ case ARM::STR:
+ return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return MI->getOperand(1).isReg();
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return MI->getOperand(1).isReg();
+ }
+ return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+ MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+ unsigned Position = MemOps[0].Position;
+ for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+ if (MemOps[i].Position < Position) {
+ Position = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (Loc != MBB.begin())
+ RS->forward(prior(Loc));
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ ARMCC::CondCodes CurrPred = ARMCC::AL;
+ unsigned CurrPredReg = 0;
+ unsigned Position = 0;
+
+ RS->enterBasicBlock(&MBB);
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ bool isMemOp = isMemoryOp(MBBI);
+ if (isMemOp) {
+ int Opcode = MBBI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ unsigned Size = getLSMultipleTransferSize(MBBI);
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+ unsigned NumOperands = MBBI->getDesc().getNumOperands();
+ unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ CurrPred = Pred;
+ CurrPredReg = PredReg;
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // No need to match PredReg.
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (Advance) {
+ ++Position;
+ ++MBBI;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ // Try to find a free register to use as a new base in case it's needed.
+ // First advance to the instruction just before the start of the chain.
+ AdvanceRS(MBB, MemOps);
+ // Find a scratch register. Make sure it's a call clobbered register or
+ // a spilled callee-saved register.
+ unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
+ if (!Scratch)
+ Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
+ AFI->getSpilledCSRegisters());
+ // Process the load / store instructions.
+ RS->forward(prior(MBBI));
+
+ // Merge ops.
+ SmallVector<MachineBasicBlock::iterator,4> MBBII =
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+ CurrPred, CurrPredReg, Scratch, MemOps);
+
+ // Try folding preceeding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+ if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
+ NumMerges++;
+ NumMerges += MBBII.size();
+
+ // Try folding preceeding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+ NumMerges++;
+
+ // RS may be pointing to an instruction that's deleted.
+ RS->skipTo(prior(MBBI));
+ }
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ CurrSize = 0;
+ CurrPred = ARMCC::AL;
+ CurrPredReg = 0;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+/// (bx lr) into the preceeding stack restore so it directly restore the value
+/// of LR into pc.
+/// ldmfd sp!, {r7, lr}
+/// bx lr
+/// =>
+/// ldmfd sp!, {r7, pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+ MachineInstr *PrevMI = prior(MBBI);
+ if (PrevMI->getOpcode() == ARM::LDM) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() == ARM::LR) {
+ PrevMI->setDesc(TII->get(ARM::LDM_RET));
+ MO.setReg(ARM::PC);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetMachine &TM = Fn.getTarget();
+ AFI = Fn.getInfo<ARMFunctionInfo>();
+ TII = TM.getInstrInfo();
+ TRI = TM.getRegisterInfo();
+ RS = new RegScavenger();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+
+ delete RS;
+ return Modified;
+}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 000000000000..6662be12a578
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,238 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunction private
+/// ARM target-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ /// Used to initialized Align, so must precede it.
+ bool isThumb;
+
+ /// Align - required alignment. ARM functions and Thumb functions with
+ /// constant pools require 4-byte alignment; other Thumb functions
+ /// require only 2-byte alignment.
+ unsigned Align;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
+
+ /// LRSpilledForFarJump - True if the LR register has been for spilled to
+ /// enable far jump.
+ bool LRSpilledForFarJump;
+
+ /// R3IsLiveIn - True if R3 is live in to this function.
+ /// FIXME: Remove when register scavenger for Thumb is done.
+ bool R3IsLiveIn;
+
+ /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ BitVector GPRCS1Frames;
+ BitVector GPRCS2Frames;
+ BitVector DPRCSFrames;
+
+ /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
+ ///
+ BitVector SpilledCSRegs;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+ unsigned ConstPoolEntryUId;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ Align(2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+ ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ Align(isThumb ? 1U : 2U),
+ VarArgsRegSaveSize(0), HasStackFrame(false),
+ LRSpilledForFarJump(false), R3IsLiveIn(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+ GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
+ JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+ bool isThumbFunction() const { return isThumb; }
+
+ unsigned getAlign() const { return Align; }
+ void setAlign(unsigned a) { Align = a; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+ bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+ void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+ // FIXME: Remove when register scavenger for Thumb is done.
+ bool isR3LiveIn() const { return R3IsLiveIn; }
+ void setR3IsLiveIn(bool l) { R3IsLiveIn = l; }
+
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ return false;
+ return GPRCS1Frames[fi];
+ }
+ bool isGPRCalleeSavedArea2Frame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+ return false;
+ return GPRCS2Frames[fi];
+ }
+ bool isDPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)DPRCSFrames.size())
+ return false;
+ return DPRCSFrames[fi];
+ }
+
+ void addGPRCalleeSavedArea1Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS1Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS1Frames.resize(Size);
+ }
+ GPRCS1Frames[fi] = true;
+ }
+ }
+ void addGPRCalleeSavedArea2Frame(int fi) {
+ if (fi >= 0) {
+ int Size = GPRCS2Frames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ GPRCS2Frames.resize(Size);
+ }
+ GPRCS2Frames[fi] = true;
+ }
+ }
+ void addDPRCalleeSavedAreaFrame(int fi) {
+ if (fi >= 0) {
+ int Size = DPRCSFrames.size();
+ if (fi >= Size) {
+ Size *= 2;
+ if (fi >= Size)
+ Size = fi+1;
+ DPRCSFrames.resize(Size);
+ }
+ DPRCSFrames[fi] = true;
+ }
+ }
+
+ void setCSRegisterIsSpilled(unsigned Reg) {
+ SpilledCSRegs.set(Reg);
+ }
+
+ bool isCSRegisterSpilled(unsigned Reg) const {
+ return SpilledCSRegs[Reg];
+ }
+
+ const BitVector &getSpilledCSRegisters() const {
+ return SpilledCSRegs;
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+
+ unsigned getNumJumpTables() const {
+ return JumpTableUId;
+ }
+
+ void initConstPoolEntryUId(unsigned UId) {
+ ConstPoolEntryUId = UId;
+ }
+
+ unsigned getNumConstPoolEntries() const {
+ return ConstPoolEntryUId;
+ }
+
+ unsigned createConstPoolEntryUId() {
+ return ConstPoolEntryUId++;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 000000000000..199858f525c8
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,1528 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
+ cl::Hidden,
+ cl::desc("Enable register scavenging on Thumb"));
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace ARM;
+ switch (RegEnum) {
+ case R0: case S0: case D0: return 0;
+ case R1: case S1: case D1: return 1;
+ case R2: case S2: case D2: return 2;
+ case R3: case S3: case D3: return 3;
+ case R4: case S4: case D4: return 4;
+ case R5: case S5: case D5: return 5;
+ case R6: case S6: case D6: return 6;
+ case R7: case S7: case D7: return 7;
+ case R8: case S8: case D8: return 8;
+ case R9: case S9: case D9: return 9;
+ case R10: case S10: case D10: return 10;
+ case R11: case S11: case D11: return 11;
+ case R12: case S12: case D12: return 12;
+ case SP: case S13: case D13: return 13;
+ case LR: case S14: case D14: return 14;
+ case PC: case S15: case D15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ default:
+ assert(0 && "Unknown ARM register!");
+ abort();
+ }
+}
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+ bool &isSPVFP) {
+ isSPVFP = false;
+
+ using namespace ARM;
+ switch (RegEnum) {
+ default:
+ assert(0 && "Unknown ARM register!");
+ abort();
+ case R0: case D0: return 0;
+ case R1: case D1: return 1;
+ case R2: case D2: return 2;
+ case R3: case D3: return 3;
+ case R4: case D4: return 4;
+ case R5: case D5: return 5;
+ case R6: case D6: return 6;
+ case R7: case D7: return 7;
+ case R8: case D8: return 8;
+ case R9: case D9: return 9;
+ case R10: case D10: return 10;
+ case R11: case D11: return 11;
+ case R12: case D12: return 12;
+ case SP: case D13: return 13;
+ case LR: case D14: return 14;
+ case PC: case D15: return 15;
+
+ case S0: case S1: case S2: case S3:
+ case S4: case S5: case S6: case S7:
+ case S8: case S9: case S10: case S11:
+ case S12: case S13: case S14: case S15:
+ case S16: case S17: case S18: case S19:
+ case S20: case S21: case S22: case S23:
+ case S24: case S25: case S26: case S27:
+ case S28: case S29: case S30: case S31: {
+ isSPVFP = true;
+ switch (RegEnum) {
+ default: return 0; // Avoid compile time warning.
+ case S0: return 0;
+ case S1: return 1;
+ case S2: return 2;
+ case S3: return 3;
+ case S4: return 4;
+ case S5: return 5;
+ case S6: return 6;
+ case S7: return 7;
+ case S8: return 8;
+ case S9: return 9;
+ case S10: return 10;
+ case S11: return 11;
+ case S12: return 12;
+ case S13: return 13;
+ case S14: return 14;
+ case S15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ }
+ }
+ }
+}
+
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+ const ARMSubtarget &sti)
+ : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+ TII(tii), STI(sti),
+ FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+ return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+ return MIB.addReg(0);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Val,
+ unsigned Pred, unsigned PredReg,
+ const TargetInstrInfo *TII,
+ bool isThumb,
+ DebugLoc dl) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+ if (isThumb)
+ BuildMI(MBB, MBBI, dl,
+ TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
+ else
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+}
+
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+ return &ARM::GPRRegClass;
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+const TargetRegisterClass*
+ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+ if (STI.isThumb()) {
+ if (isLowRegister(Reg))
+ return ARM::tGPRRegisterClass;
+ switch (Reg) {
+ default:
+ break;
+ case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
+ case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC:
+ return ARM::GPRRegisterClass;
+ }
+ }
+ return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+const unsigned*
+ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *
+ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
+ };
+ static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
+ &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
+ };
+ return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses;
+}
+
+BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ // FIXME: avoid re-calculating this everytime.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(ARM::SP);
+ Reserved.set(ARM::PC);
+ if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(FramePtr);
+ // Some targets reserve R9.
+ if (STI.isR9Reserved())
+ Reserved.set(ARM::R9);
+ return Reserved;
+}
+
+bool
+ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+ switch (Reg) {
+ default: break;
+ case ARM::SP:
+ case ARM::PC:
+ return true;
+ case ARM::R7:
+ case ARM::R11:
+ if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ return true;
+ break;
+ case ARM::R9:
+ return STI.isR9Reserved();
+ }
+
+ return false;
+}
+
+bool
+ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ return ThumbRegScavenging || !AFI->isThumbFunction();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function. This eliminates the need for
+// add/sub sp brackets around call sites. Returns true if the call frame is
+// included as part of the stack frame.
+bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ unsigned CFSize = FFI->getMaxCallFrameSize();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // It's not always a good idea to include the call frame as part of the
+ // stack frame. ARM (especially Thumb) has small immediate offset to
+ // address the stack frame. So a large call frame can cause poor codegen
+ // and may even makes it impossible to scavenge a register.
+ if (AFI->isThumbFunction()) {
+ if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+ return false;
+ } else {
+ if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
+ return false;
+ }
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitARMRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg, int NumBytes,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo &TII,
+ DebugLoc dl) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ // Get the properly encoded SOImmVal field.
+ int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+ assert(SOImmVal != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ BaseReg = DestReg;
+ }
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+ unsigned NumBits, unsigned Scale) {
+ unsigned NumMIs = 0;
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+ if (Opc == ARM::tADDrSPi) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ NumMIs++;
+ NumBits = 8;
+ Scale = 1; // Followed by a number of tADDi8.
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ }
+
+ NumMIs += Bytes / Chunk;
+ if ((Bytes % Chunk) != 0)
+ NumMIs++;
+ if (ExtraOpc)
+ NumMIs++;
+ return NumMIs;
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, bool CanChangeCC,
+ const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isHigh = !MRI.isLowRegister(DestReg) ||
+ (BaseReg != 0 && !MRI.isLowRegister(BaseReg));
+ bool isSub = false;
+ // Subtract doesn't have high register version. Load the negative value
+ // if either base or dest register is a high register. Also, if do not
+ // issue sub as part of the sequence if condition register is to be
+ // preserved.
+ if (NumBytes < 0 && !isHigh && CanChangeCC) {
+ isSub = true;
+ NumBytes = -NumBytes;
+ }
+ unsigned LdReg = DestReg;
+ if (DestReg == ARM::SP) {
+ assert(BaseReg == ARM::SP && "Unexpected!");
+ LdReg = ARM::R3;
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R3, RegState::Kill);
+ }
+
+ if (NumBytes <= 255 && NumBytes >= 0)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ else if (NumBytes < 0 && NumBytes >= -255) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+ .addReg(LdReg, RegState::Kill);
+ } else
+ MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII,
+ true, dl);
+
+ // Emit add / sub.
+ int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+ const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+ TII.get(Opc), DestReg);
+ if (DestReg == ARM::SP || isSub)
+ MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+ else
+ MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+ if (DestReg == ARM::SP)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+ .addReg(ARM::R12, RegState::Kill);
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ bool DstNotEqBase = false;
+ unsigned NumBits = 1;
+ unsigned Scale = 1;
+ int Opc = 0;
+ int ExtraOpc = 0;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ NumBits = 7;
+ Scale = 4;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ // r1 = add sp, 403
+ // =>
+ // r1 = add sp, 100 * 4
+ // r1 = add r1, 3
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ NumBits = 8;
+ Scale = 4;
+ Opc = ARM::tADDrSPi;
+ } else {
+ // sp = sub sp, c
+ // r1 = sub sp, c
+ // r8 = sub sp, c
+ if (DestReg != BaseReg)
+ DstNotEqBase = true;
+ NumBits = 8;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+
+ unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+ unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+ if (NumMIs > Threshold) {
+ // This will expand into too many instructions. Load the immediate from a
+ // constpool entry.
+ emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+ MRI, dl);
+ return;
+ }
+
+ if (DstNotEqBase) {
+ if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+ .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill);
+ }
+ BaseReg = DestReg;
+ }
+
+ unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ ThisVal /= Scale;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr)
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(DestReg).addImm(ThisVal);
+ else {
+ bool isKill = BaseReg != ARM::SP;
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+ BaseReg = DestReg;
+
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Scale = 1;
+ Chunk = ((1 << NumBits) - 1) * Scale;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc)
+ BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
+ bool isThumb, const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ if (isThumb)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, dl);
+ else
+ emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+ Pred, PredReg, TII, dl);
+}
+
+void ARMRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
+ MachineInstr *Old = I;
+ DebugLoc dl = Old->getDebugLoc();
+ unsigned Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ unsigned Opc = Old->getOpcode();
+ bool isThumb = AFI->isThumbFunction();
+ ARMCC::CondCodes Pred = isThumb
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm();
+ if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+ // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+ unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
+ emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl);
+ } else {
+ // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+ unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg();
+ assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+ emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl);
+ }
+ }
+ }
+ MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII,
+ const ARMRegisterInfo& MRI,
+ DebugLoc dl) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+ if (isSub)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+ .addReg(DestReg, RegState::Kill);
+}
+
+/// findScratchRegister - Find a 'free' ARM register. If register scavenger
+/// is not being used, R12 is available. Otherwise, try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
+ ARMFunctionInfo *AFI) {
+ unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12;
+ assert (!AFI->isThumbFunction());
+ if (Reg == 0)
+ // Try a already spilled CS register.
+ Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters());
+
+ return Reg;
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+ Offset -= AFI->getDPRCalleeSavedAreaOffset();
+ else if (hasFP(MF)) {
+ assert(SPAdj == 0 && "Unexpected");
+ // There is alloca()'s in this function, must reference off the frame
+ // pointer instead.
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ }
+
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ // Memory operands in inline assembly always use AddrMode2.
+ if (Opcode == ARM::INLINEASM)
+ AddrMode = ARMII::AddrMode2;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(i+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::MOVr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setDesc(TII.get(ARM::SUBri));
+ }
+
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ // Otherwise, we fallback to common code below to form the imm offset with
+ // a sequence of ADDri instructions. First though, pull as much of the imm
+ // into this ADDri as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+ assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
+ MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+ } else if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(i+1).getImm();
+
+ // Can't use tADDrSPi if it's based off the frame pointer.
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ if (FrameReg != ARM::SP) {
+ Opcode = ARM::tADDi3;
+ MI.setDesc(TII.get(ARM::tADDi3));
+ NumBits = 3;
+ } else {
+ NumBits = 8;
+ Scale = 4;
+ assert((Offset & 3) == 0 &&
+ "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+ }
+
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setDesc(TII.get(ARM::tMOVhir2lor));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ }
+
+ // Common case: small offset, fits into instruction.
+ unsigned Mask = (1 << NumBits) - 1;
+ if (((Offset / Scale) & ~Mask) == 0) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ return;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+ unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+ // MI would expand into a large number of instructions. Don't try to
+ // simplify the immediate.
+ if (NumMIs > 2) {
+ emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+ *this, dl);
+ MBB.erase(II);
+ return;
+ }
+
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Mask);
+ Offset = (Offset - Mask * Scale);
+ MachineBasicBlock::iterator NII = next(II);
+ emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+ *this, dl);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+ MI.setDesc(TII.get(ARM::tADDhirr));
+ MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ }
+ return;
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode2: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode5: {
+ ImmIdx = i+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrModeTs: {
+ ImmIdx = i+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+ Scale = 4;
+ break;
+ }
+ default:
+ assert(0 && "Unsupported addressing mode!");
+ abort();
+ break;
+ }
+
+ Offset += InstrOffs * Scale;
+ assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+ if (Offset < 0 && !isThumb) {
+ Offset = -Offset;
+ isSub = true;
+ }
+
+ // Common case: small offset, fits into instruction.
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+ if (AddrMode == ARMII::AddrModeTs) {
+ // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+ // a different base register.
+ NumBits = 5;
+ Mask = (1 << NumBits) - 1;
+ }
+ // If this is a thumb spill / restore, we will be using a constpool load to
+ // materialize the offset.
+ if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
+ ImmOp.ChangeToImmediate(0);
+ else {
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+ }
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
+
+ if (isThumb) {
+ if (Desc.mayLoad()) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ bool UseRR = false;
+ if (Opcode == ARM::tRestore) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+ true, dl);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+ *this, dl);
+ MI.setDesc(TII.get(ARM::tLDR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR)
+ // Use [reg, reg] addrmode.
+ MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+ else // tLDR has an extra register operand.
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+ } else if (Desc.mayStore()) {
+ // FIXME! This is horrific!!! We need register scavenging.
+ // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+ // also a ABI register so it's possible that is is the register that is
+ // being storing here. If that's the case, we do the following:
+ // r12 = r2
+ // Use r2 to materialize sp + offset
+ // str r3, r2
+ // r2 = r12
+ unsigned ValReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = ARM::R3;
+ bool UseRR = false;
+ if (ValReg == ARM::R3) {
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R2, RegState::Kill);
+ TmpReg = ARM::R2;
+ }
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+ .addReg(ARM::R3, RegState::Kill);
+ if (Opcode == ARM::tSpill) {
+ if (FrameReg == ARM::SP)
+ emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+ Offset, false, TII, *this, dl);
+ else {
+ emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+ true, dl);
+ UseRR = true;
+ }
+ } else
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+ *this, dl);
+ MI.setDesc(TII.get(ARM::tSTR));
+ MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+ if (UseRR) // Use [reg, reg] addrmode.
+ MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+ else // tSTR has an extra register operand.
+ MI.addOperand(MachineOperand::CreateReg(0, false));
+
+ MachineBasicBlock::iterator NII = next(II);
+ if (ValReg == ARM::R3)
+ BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+ .addReg(ARM::R12, RegState::Kill);
+ if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+ BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+ .addReg(ARM::R12, RegState::Kill);
+ } else
+ assert(false && "Unexpected opcode!");
+ } else {
+ // Insert a set of r12 with the full address: r12 = sp + offset
+ // If the offset we have is too large to fit into the instruction, we need
+ // to form it with a series of ADDri's. Do this by taking 8-bit chunks
+ // out of 'Offset'.
+ unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+ if (ScratchReg == 0)
+ // No register is "free". Scavenge a register.
+ ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+ int PIdx = MI.findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+ unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+ isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+ MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+ }
+}
+
+static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+ }
+ return (unsigned)Offset;
+}
+
+void
+ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register
+ // to take advantage the eliminateFrameIndex machinery. This also ensures it
+ // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool CanEliminateFrame = true;
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+ AFI->setCSRegisterIsSpilled(Reg);
+ Spilled = true;
+ CanEliminateFrame = false;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ CanEliminateFrame = false;
+ }
+ }
+ }
+
+ if (CSRegClasses[i] == &ARM::GPRRegClass) {
+ if (Spilled) {
+ NumGPRSpills++;
+
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
+ LRSpilled = true;
+ CS1Spilled = true;
+ continue;
+ }
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+ }
+
+ bool ForceLRSpill = false;
+ if (!LRSpilled && AFI->isThumbFunction()) {
+ unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ // Force LR to be spilled if the Thumb function size is > 2048. This enables
+ // use of BL to implement far jump. If it turns out that it's not needed
+ // then the branch fix up path will undo it.
+ if (FnSize >= (1 << 11)) {
+ CanEliminateFrame = false;
+ ForceLRSpill = true;
+ }
+ }
+
+ bool ExtraCSSpill = false;
+ if (!CanEliminateFrame || hasFP(MF)) {
+ AFI->setHasStackFrame(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ ForceLRSpill = false;
+ ExtraCSSpill = true;
+ }
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MF.getRegInfo().setPhysRegUsed(FramePtr);
+ NumGPRSpills++;
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling an odd number
+ // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+ for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+ unsigned Reg = UnspilledCS1GPRs[i];
+ // Don't spiil high register if the function is thumb
+ if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ break;
+ }
+ }
+ } else if (!UnspilledCS2GPRs.empty() &&
+ !AFI->isThumbFunction()) {
+ unsigned Reg = UnspilledCS2GPRs.front();
+ MF.getRegInfo().setPhysRegUsed(Reg);
+ AFI->setCSRegisterIsSpilled(Reg);
+ if (!isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ }
+ }
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additiona
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging.
+ if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Size = estimateStackSize(MF, MFI);
+ unsigned Limit = (1 << 12) - 1;
+ for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB)
+ for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) {
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isFI()) {
+ unsigned Opcode = I->getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ if (AddrMode == ARMII::AddrMode3) {
+ Limit = (1 << 8) - 1;
+ goto DoneEstimating;
+ } else if (AddrMode == ARMII::AddrMode5) {
+ unsigned ThisLimit = ((1 << 8) - 1) * 4;
+ if (ThisLimit < Limit)
+ Limit = ThisLimit;
+ }
+ }
+ }
+ DoneEstimating:
+ if (Size >= Limit) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ AFI->setCSRegisterIsSpilled(Extras[i]);
+ }
+ } else {
+ // Reserve a slot closest to SP or frame pointer.
+ const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+ }
+ }
+ }
+
+ if (ForceLRSpill) {
+ MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ AFI->setCSRegisterIsSpilled(ARM::LR);
+ AFI->setLRIsSpilledForFarJump(true);
+ }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Opc, unsigned Area,
+ const ARMSubtarget &STI) {
+ while (MBBI != MBB.end() &&
+ MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) {
+ if (Area != 0) {
+ bool Done = false;
+ unsigned Category = 0;
+ switch (MBBI->getOperand(0).getReg()) {
+ case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7:
+ case ARM::LR:
+ Category = 1;
+ break;
+ case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
+ Category = STI.isTargetDarwin() ? 2 : 1;
+ break;
+ case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11:
+ case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+ Category = 3;
+ break;
+ default:
+ Done = true;
+ break;
+ }
+ if (Done || Category != Area)
+ break;
+ }
+
+ ++MBBI;
+ }
+}
+
+void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ if (isThumb) {
+ // Check if R3 is live in. It might have to be used as a scratch register.
+ for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+ E = MF.getRegInfo().livein_end(); I != E; ++I) {
+ if (I->first == ARM::R3) {
+ AFI->setR3IsLiveIn(true);
+ break;
+ }
+ }
+
+ // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+ NumBytes = (NumBytes + 3) & ~3;
+ MFI->setStackSize(NumBytes);
+ }
+
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ return;
+ }
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isTargetDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+ emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+ } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+ ++MBBI;
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+ }
+
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if (STI.isTargetDarwin() || hasFP(MF)) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri),
+ FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+ if (!isThumb) AddDefaultCC(AddDefaultPred(MIB));
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+ emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl);
+
+ // Build the new SUBri to adjust SP for FP callee-save spill area.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+ emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl);
+ }
+
+ // Determine starting offsets of spill areas.
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Insert it after all the callee-save spills.
+ if (!isThumb)
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+ emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ }
+
+ if(STI.isTargetELF() && hasFP(MF)) {
+ MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+ AFI->getFramePtrSpillOffset());
+ }
+
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ return ((MI->getOpcode() == ARM::FLDD ||
+ MI->getOpcode() == ARM::LDR ||
+ MI->getOpcode() == ARM::tRestore) &&
+ MI->getOperand(1).isFI() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert((MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+
+ if (!AFI->hasStackFrame()) {
+ if (NumBytes != 0)
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+ } else {
+ // Unwind MBBI to point to first LDR / FLDD.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
+
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+ if (isThumb) {
+ if (hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (NumBytes)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+ TII, *this, dl);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+ .addReg(FramePtr);
+ } else {
+ if (MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI &&
+ prior(MBBI)->getOpcode() == ARM::tPOP) {
+ MachineBasicBlock::iterator PMBBI = prior(MBBI);
+ emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+ } else
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+ }
+ } else {
+ // Darwin ABI requires FP to point to the stack slot that contains the
+ // previous FP.
+ if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->getGPRCalleeSavedArea2Size() ||
+ AFI->getDPRCalleeSavedAreaSize() ||
+ AFI->getDPRCalleeSavedAreaOffset()||
+ hasFP(MF)) {
+ if (NumBytes)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+ .addImm(NumBytes)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ }
+ } else if (NumBytes) {
+ emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl);
+ }
+
+ // Move SP to start of integer callee save spill area 2.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+
+ // Move SP to start of integer callee save spill area 1.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+
+ // Move SP to SP upon entry to the function.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
+ false, TII, *this, dl);
+ }
+ }
+
+ if (VARegSaveSize) {
+ if (isThumb)
+ // Epilogue for vararg functions: pop LR to R3 and branch off it.
+ // FIXME: Verify this is still ok when R3 is no longer being reserved.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+ emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+ *this, dl);
+
+ if (isThumb) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+ MBB.erase(MBBI);
+ }
+ }
+}
+
+unsigned ARMRegisterInfo::getRARegister() const {
+ return ARM::LR;
+}
+
+unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (STI.isTargetDarwin() || hasFP(MF))
+ return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
+ else
+ return ARM::SP;
+}
+
+unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 000000000000..e1d9efbcabf7
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,102 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class ARMSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+struct ARMRegisterInfo : public ARMGenRegisterInfo {
+ const TargetInstrInfo &TII;
+ const ARMSubtarget &STI;
+private:
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+ /// emitLoadConstPool - Emits a load from constpool to materialize the
+ /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Val,
+ unsigned Pred, unsigned PredReg,
+ const TargetInstrInfo *TII, bool isThumb,
+ DebugLoc dl) const;
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// ARM::LR, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Same as previous getRegisterNumbering except it returns true in isSPVFP
+ /// if the register is a single precision VFP register.
+ static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ /// Code Generation virtual methods...
+ const TargetRegisterClass *
+ getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ bool isLowRegister(unsigned Reg) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 000000000000..e8daf7489ebe
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,221 @@
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "ARM";
+ let SubRegs = subregs;
+}
+
+class ARMFReg<bits<5> num, string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "ARM";
+}
+
+// Integer registers
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : ARMReg< 0, "d0", [S0, S1]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+
+// Register classes.
+//
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R12, R11,
+ LR, SP, PC]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ // FIXME: We are reserving r12 in case the PEI needs to use it to
+ // generate large stack offset. Make it available once we have register
+ // scavenging. Similarly r3 is reserved in Thumb mode for now.
+ let MethodBodies = [{
+ // FP is R11, R9 is available.
+ static const unsigned ARM_GPR_AO_1[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+ // FP is R11, R9 is not available.
+ static const unsigned ARM_GPR_AO_2[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R10,
+ ARM::R11 };
+ // FP is R7, R9 is available.
+ static const unsigned ARM_GPR_AO_3[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R9, ARM::R10,ARM::R11,
+ ARM::R7 };
+ // FP is R7, R9 is not available.
+ static const unsigned ARM_GPR_AO_4[] = {
+ ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6,
+ ARM::R8, ARM::R10,ARM::R11,
+ ARM::R7 };
+
+ GPRClass::iterator
+ GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_4;
+ else
+ return ARM_GPR_AO_3;
+ } else {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_2;
+ else
+ return ARM_GPR_AO_1;
+ }
+ }
+
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+ }
+ } else {
+ if (Subtarget.isR9Reserved()) {
+ I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+ } else {
+ I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+ }
+ }
+
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ }
+ }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it
+ // to generate large stack offset. Make it available once we have register
+ // scavenging.
+ let MethodBodies = [{
+ static const unsigned THUMB_tGPR_AO[] = {
+ ARM::R2, ARM::R1, ARM::R0,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ // FP is R7, only low registers available.
+ tGPRClass::iterator
+ tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ return THUMB_tGPR_AO;
+ }
+
+ tGPRClass::iterator
+ tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ tGPRClass::iterator I =
+ THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ }
+ }];
+}
+
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+ S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+ D9, D10, D11, D12, D13, D14, D15]>;
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 000000000000..2cc295085b8b
--- /dev/null
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,56 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace ARM {
+ enum RelocationType {
+ // reloc_arm_absolute - Absolute relocation, just add the relocated value
+ // to the value already in memory.
+ reloc_arm_absolute,
+
+ // reloc_arm_relative - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_arm_relative,
+
+ // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+ // addresses are kept locally in a map.
+ reloc_arm_cp_entry,
+
+ // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+ // should be divided by 4.
+ reloc_arm_vfp_cp_entry,
+
+ // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+ // entry.
+ reloc_arm_machine_cp_entry,
+
+ // reloc_arm_jt_base - PC relative relocation for jump tables whose
+ // addresses are kept locally in a map.
+ reloc_arm_jt_base,
+
+ // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+ reloc_arm_pic_jt,
+
+ // reloc_arm_branch - Branch address relocation.
+ reloc_arm_branch
+ };
+ }
+}
+
+#endif
+
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 000000000000..ef78cd52d85a
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,84 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "llvm/Module.h"
+using namespace llvm;
+
+ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
+ bool isThumb)
+ : ARMArchVersion(V4T)
+ , ARMFPUType(None)
+ , IsThumb(isThumb)
+ , ThumbMode(Thumb1)
+ , UseThumbBacktraces(false)
+ , IsR9Reserved(false)
+ , stackAlignment(4)
+ , CPUString("generic")
+ , TargetType(isELF) // Default to ELF unless otherwise specified.
+ , TargetABI(ARM_ABI_APCS) {
+ // Determine default and user specified characteristics
+
+ // Parse features string.
+ CPUString = ParseSubtargetFeatures(FS, CPUString);
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ unsigned Len = TT.length();
+ unsigned Idx = 0;
+
+ if (Len >= 5 && TT.substr(0, 4) == "armv")
+ Idx = 4;
+ else if (Len >= 6 && TT.substr(0, 6) == "thumb") {
+ IsThumb = true;
+ if (Len >= 7 && TT[5] == 'v')
+ Idx = 6;
+ }
+ if (Idx) {
+ unsigned SubVer = TT[Idx];
+ if (SubVer > '4' && SubVer <= '9') {
+ if (SubVer >= '7')
+ ARMArchVersion = V7A;
+ else if (SubVer == '6')
+ ARMArchVersion = V6;
+ else if (SubVer == '5') {
+ ARMArchVersion = V5T;
+ if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+ ARMArchVersion = V5TE;
+ }
+ }
+ }
+
+ if (Len >= 10) {
+ if (TT.find("-darwin") != std::string::npos)
+ // arm-darwin
+ TargetType = isDarwin;
+ } else if (TT.empty()) {
+#if defined(__APPLE__)
+ TargetType = isDarwin;
+#endif
+ }
+
+ if (TT.find("eabi") != std::string::npos)
+ TargetABI = ARM_ABI_AAPCS;
+
+ if (isAAPCS_ABI())
+ stackAlignment = 8;
+
+ if (isTargetDarwin()) {
+ UseThumbBacktraces = true;
+ IsR9Reserved = true;
+ }
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 000000000000..8b469cff77d8
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,122 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+ enum ARMArchEnum {
+ V4T, V5T, V5TE, V6, V7A
+ };
+
+ enum ARMFPEnum {
+ None, VFPv2, VFPv3, NEON
+ };
+
+ enum ThumbTypeEnum {
+ Thumb1,
+ Thumb2
+ };
+
+ /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE,
+ /// V6, V6T2, V7A.
+ ARMArchEnum ARMArchVersion;
+
+ /// ARMFPUType - Floating Point Unit type.
+ ARMFPEnum ARMFPUType;
+
+ /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+ bool IsThumb;
+
+ /// ThumbMode - Indicates supported Thumb version.
+ ThumbTypeEnum ThumbMode;
+
+ /// UseThumbBacktraces - True if we use thumb style backtraces.
+ bool UseThumbBacktraces;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+ public:
+ enum {
+ isELF, isDarwin
+ } TargetType;
+
+ enum {
+ ARM_ABI_APCS,
+ ARM_ABI_AAPCS // ARM EABI
+ } TargetABI;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ ARMSubtarget(const Module &M, const std::string &FS, bool isThumb);
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
+ // Change this once Thumb ldmia / stmia support is added.
+ return isThumb() ? 0 : 64;
+ }
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasV4TOps() const { return ARMArchVersion >= V4T; }
+ bool hasV5TOps() const { return ARMArchVersion >= V5T; }
+ bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+ bool hasV6Ops() const { return ARMArchVersion >= V6; }
+ bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+
+ bool hasVFP2() const { return ARMFPUType >= VFPv2; }
+ bool hasVFP3() const { return ARMFPUType >= VFPv3; }
+ bool hasNEON() const { return ARMFPUType >= NEON; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const { return TargetType == isELF; }
+
+ bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+ bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+ bool isThumb() const { return IsThumb; }
+ bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+
+ bool useThumbBacktraces() const { return UseThumbBacktraces; }
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ const std::string & getCPUString() const { return CPUString; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
new file mode 100644
index 000000000000..4107dccd2a69
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -0,0 +1,291 @@
+//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetAsmInfo.h"
+#include "ARMTargetMachine.h"
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+
+const char *const llvm::arm_asm_table[] = {
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0};
+
+ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
+ ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LessPrivateGlobalPrefix = "l";
+ StringConstantPrefix = "\1LC";
+ BSSSection = 0; // no BSS section
+ ZeroDirective = "\t.space\t";
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ SetDirective = "\t.set\t";
+ WeakRefDirective = "\t.weak_reference\t";
+ WeakDefDirective = "\t.weak_definition ";
+ HiddenDirective = "\t.private_extern\t";
+ ProtectedDirective = NULL;
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ NeedsIndirectEncoding = true;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+}
+
+ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
+ ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ NeedsSet = false;
+ HasLEB128 = true;
+ AbsoluteDebugSectionOffsets = true;
+ CStringSection = ".rodata.str";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ DwarfRequiresFrameSection = false;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",%progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",%progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",%progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",%progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",%progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+
+ if (Subtarget->isAAPCS_ABI()) {
+ StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
+ StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array";
+ } else {
+ StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+ }
+}
+
+/// Count the number of comma-separated arguments.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countArguments(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p != '\n')
+ p++;
+ count++;
+ while (*p && *p!='\n' &&
+ strncmp(p, BaseTAI::CommentString,
+ strlen(BaseTAI::CommentString))!=0) {
+ if (*p==',')
+ count++;
+ p++;
+ }
+ return count;
+}
+
+/// Count the length of a string enclosed in quote characters.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countString(const char* p) const {
+ unsigned count = 0;
+ while (*p && isspace(*p) && *p!='\n')
+ p++;
+ if (!*p || *p != '\"')
+ return count;
+ while (*++p && *p != '\"')
+ count++;
+ return count;
+}
+
+/// ARM-specific version of TargetAsmInfo::getInlineAsmLength.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::getInlineAsmLength(const char *s) const {
+ // Make a lowercase-folded version of s for counting purposes.
+ char *q, *s_copy = (char *)malloc(strlen(s) + 1);
+ strcpy(s_copy, s);
+ for (q=s_copy; *q; q++)
+ *q = tolower(*q);
+ const char *Str = s_copy;
+
+ // Count the number of bytes in the asm.
+ bool atInsnStart = true;
+ bool inTextSection = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (atInsnStart) {
+ // Skip whitespace
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ // Skip label
+ for (const char* p = Str; *p && !isspace(*p); p++)
+ if (*p == ':') {
+ Str = p+1;
+ while (*Str && isspace(*Str) && *Str != '\n')
+ Str++;
+ break;
+ }
+
+ if (*Str == 0) break;
+
+ // Ignore everything from comment char(s) to EOL
+ if (strncmp(Str, BaseTAI::CommentString,
+ strlen(BaseTAI::CommentString)) == 0)
+ atInsnStart = false;
+ // FIXME do something like the following for non-Darwin
+ else if (*Str == '.' && Subtarget->isTargetDarwin()) {
+ // Directive.
+ atInsnStart = false;
+
+ // Some change the section, but don't generate code.
+ if (strncmp(Str, ".literal4", strlen(".literal4"))==0 ||
+ strncmp(Str, ".literal8", strlen(".literal8"))==0 ||
+ strncmp(Str, ".const", strlen(".const"))==0 ||
+ strncmp(Str, ".constructor", strlen(".constructor"))==0 ||
+ strncmp(Str, ".cstring", strlen(".cstring"))==0 ||
+ strncmp(Str, ".data", strlen(".data"))==0 ||
+ strncmp(Str, ".destructor", strlen(".destructor"))==0 ||
+ strncmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 ||
+ strncmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 ||
+ strncmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 ||
+ strncmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 ||
+ strncmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 ||
+ strncmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 ||
+ strncmp(Str, ".static_data", strlen(".static_data"))==0 ||
+ strncmp(Str, ".section", strlen(".section"))==0 ||
+ strncmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 ||
+ strncmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 ||
+ strncmp(Str, ".dyld", strlen(".dyld"))==0 ||
+ strncmp(Str, ".const_data", strlen(".const_data"))==0 ||
+ strncmp(Str, ".objc", strlen(".objc"))==0 || //// many directives
+ strncmp(Str, ".static_const", strlen(".static_const"))==0)
+ inTextSection=false;
+ else if (strncmp(Str, ".text", strlen(".text"))==0)
+ inTextSection = true;
+ // Some can't really be handled without implementing significant pieces
+ // of an assembler. Others require dynamic adjustment of block sizes in
+ // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every
+ // instruction in there, and none of these are currently used in the kernel.
+ else if (strncmp(Str, ".macro", strlen(".macro"))==0 ||
+ strncmp(Str, ".if", strlen(".if"))==0 ||
+ strncmp(Str, ".align", strlen(".align"))==0 ||
+ strncmp(Str, ".fill", strlen(".fill"))==0 ||
+ strncmp(Str, ".space", strlen(".space"))==0 ||
+ strncmp(Str, ".zerofill", strlen(".zerofill"))==0 ||
+ strncmp(Str, ".p2align", strlen(".p2align"))==0 ||
+ strncmp(Str, ".p2alignw", strlen(".p2alignw"))==0 ||
+ strncmp(Str, ".p2alignl", strlen(".p2alignl"))==0 ||
+ strncmp(Str, ".align32", strlen(".p2align32"))==0 ||
+ strncmp(Str, ".include", strlen(".include"))==0)
+ cerr << "Directive " << Str << " in asm may lead to invalid offsets for" <<
+ " constant pools (the assembler will tell you if this happens).\n";
+ // Some generate code, but this is only interesting in the text section.
+ else if (inTextSection) {
+ if (strncmp(Str, ".long", strlen(".long"))==0)
+ Length += 4*countArguments(Str+strlen(".long"));
+ else if (strncmp(Str, ".short", strlen(".short"))==0)
+ Length += 2*countArguments(Str+strlen(".short"));
+ else if (strncmp(Str, ".byte", strlen(".byte"))==0)
+ Length += 1*countArguments(Str+strlen(".byte"));
+ else if (strncmp(Str, ".single", strlen(".single"))==0)
+ Length += 4*countArguments(Str+strlen(".single"));
+ else if (strncmp(Str, ".double", strlen(".double"))==0)
+ Length += 8*countArguments(Str+strlen(".double"));
+ else if (strncmp(Str, ".quad", strlen(".quad"))==0)
+ Length += 16*countArguments(Str+strlen(".quad"));
+ else if (strncmp(Str, ".ascii", strlen(".ascii"))==0)
+ Length += countString(Str+strlen(".ascii"));
+ else if (strncmp(Str, ".asciz", strlen(".asciz"))==0)
+ Length += countString(Str+strlen(".asciz"))+1;
+ }
+ } else if (inTextSection) {
+ // An instruction
+ atInsnStart = false;
+ if (Subtarget->isThumb()) {
+ // BL and BLX <non-reg> are 4 bytes, all others 2.
+ if (strncmp(Str, "blx", strlen("blx"))==0) {
+ const char* p = Str+3;
+ while (*p && isspace(*p))
+ p++;
+ if (*p == 'r' || *p=='R')
+ Length += 2; // BLX reg
+ else
+ Length += 4; // BLX non-reg
+ } else if (strncmp(Str, "bl", strlen("bl"))==0)
+ Length += 4; // BL
+ else
+ Length += 2; // Thumb anything else
+ }
+ else
+ Length += 4; // ARM
+ }
+ }
+ if (*Str == '\n' || *Str == BaseTAI::SeparatorChar)
+ atInsnStart = true;
+ }
+ free(s_copy);
+ return Length;
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
new file mode 100644
index 000000000000..9e6f8568f76a
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -0,0 +1,64 @@
+//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETASMINFO_H
+#define ARMTARGETASMINFO_H
+
+#include "ARMTargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ extern const char *const arm_asm_table[];
+
+ template <class BaseTAI>
+ struct ARMTargetAsmInfo : public BaseTAI {
+ explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
+ BaseTAI(TM) {
+ BaseTAI::AsmTransCBE = arm_asm_table;
+
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::Data64bitsDirective = 0;
+ BaseTAI::CommentString = "@";
+ BaseTAI::ConstantPoolSection = "\t.text\n";
+ BaseTAI::COMMDirectiveTakesAlignment = false;
+ BaseTAI::InlineAsmStart = "@ InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "@ InlineAsm End";
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ }
+
+ const ARMSubtarget *Subtarget;
+
+ virtual unsigned getInlineAsmLength(const char *Str) const;
+ unsigned countArguments(const char *p) const;
+ unsigned countString(const char *p) const;
+ };
+
+ typedef ARMTargetAsmInfo<TargetAsmInfo> ARMGenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
+
+ struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM);
+ };
+
+ struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 000000000000..1dc7d19aa105
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,242 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+ cl::desc("Disable load store optimization pass"));
+static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
+ cl::desc("Disable if-conversion pass"));
+
+/// ARMTargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int ARMTargetMachineModule;
+int ARMTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<ARMTargetMachine> X("arm", "ARM");
+static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
+
+// No assembler printer by default
+ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
+
+/// ThumbTargetMachine - Create an Thumb architecture model.
+///
+unsigned ThumbTargetMachine::getJITMatchQuality() {
+#if defined(__thumb__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ // Match thumb-foo-bar, as well as things like thumbv5blah-*
+ if (TT.size() >= 6 &&
+ (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv"))
+ return 20;
+
+ // If the target triple is something non-thumb, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+ : ARMTargetMachine(M, FS, true) {
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
+ bool isThumb)
+ : Subtarget(M, FS, isThumb),
+ DataLayout(Subtarget.isAPCS_ABI() ?
+ // APCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:32:32-i64:32:32")) :
+ // AAPCS ABI
+ (isThumb ?
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64"))),
+ InstrInfo(Subtarget),
+ FrameInfo(Subtarget),
+ JITInfo(),
+ TLInfo(*this) {
+ DefRelocModel = getRelocationModel();
+}
+
+unsigned ARMTargetMachine::getJITMatchQuality() {
+#if defined(__arm__)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ // Match arm-foo-bar, as well as things like armv5blah-*
+ if (TT.size() >= 4 &&
+ (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv"))
+ return 20;
+ // If the target triple is something non-arm, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+ switch (Subtarget.TargetType) {
+ case ARMSubtarget::isDarwin:
+ return new ARMDarwinTargetAsmInfo(*this);
+ case ARMSubtarget::isELF:
+ return new ARMELFTargetAsmInfo(*this);
+ default:
+ return new ARMGenericTargetAsmInfo(*this);
+ }
+}
+
+
+// Pass Pipeline Configuration
+bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createARMISelDag(*this));
+ return false;
+}
+
+bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass());
+
+ if (OptLevel != CodeGenOpt::None &&
+ !DisableIfConversion && !Subtarget.isThumb())
+ PM.add(createIfConverterPass());
+
+ PM.add(createARMConstantIslandPass());
+ return true;
+}
+
+bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+
+ return false;
+}
+
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ if (DefRelocModel == Reloc::Default)
+ setRelocationModel(Reloc::Static);
+
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for ARM.
+ PM.add(createARMJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 000000000000..916a8aa9e985
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,104 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMTargetMachine : public LLVMTargetMachine {
+ ARMSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMInstrInfo InstrInfo;
+ ARMFrameInfo FrameInfo;
+ ARMJITInfo JITInfo;
+ ARMTargetLowering TLInfo;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+protected:
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ ARMTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+
+ virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
+ virtual const ARMRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual ARMTargetLowering *getTargetLowering() const {
+ return const_cast<ARMTargetLowering*>(&TLInfo);
+ }
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &MCE);
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+///
+class ThumbTargetMachine : public ARMTargetMachine {
+public:
+ ThumbTargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
new file mode 100644
index 000000000000..d908cf436313
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -0,0 +1,1117 @@
+//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMBuildAttrs.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction.
+ ARMFunctionInfo *AFI;
+
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+
+ /// GVNonLazyPtrs - Keeps the set of GlobalValues that require
+ /// non-lazy-pointers for indirect access.
+ StringSet<> GVNonLazyPtrs;
+
+ /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden
+ /// visibility that require non-lazy-pointers for indirect access.
+ StringSet<> HiddenGVNonLazyPtrs;
+
+ /// FnStubs - Keeps the set of external function GlobalAddresses that the
+ /// asm printer should generate stubs for.
+ StringSet<> FnStubs;
+
+ /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+ bool InCPMode;
+ public:
+ explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+ InCPMode(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSOImmOperand(const MachineInstr *MI, int opNum);
+ void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+ void printSORegOperand(const MachineInstr *MI, int opNum);
+ void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+ unsigned Scale);
+ void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+ void printPredicateOperand(const MachineInstr *MI, int opNum);
+ void printSBitModifierOperand(const MachineInstr *MI, int opNum);
+ void printPCLabel(const MachineInstr *MI, int opNum);
+ void printRegisterList(const MachineInstr *MI, int opNum);
+ void printCPInstOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier);
+ void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode);
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+ /// the .s file.
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ printDataDirective(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+ GlobalValue *GV = ACPV->getGV();
+ std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix();
+ if (!GV)
+ Name += ACPV->getSymbol();
+ if (ACPV->isNonLazyPointer()) {
+ if (GV->hasHiddenVisibility())
+ HiddenGVNonLazyPtrs.insert(Name);
+ else
+ GVNonLazyPtrs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ } else if (ACPV->isStub()) {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ } else
+ O << Name;
+ if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
+ if (ACPV->getPCAdjustment() != 0) {
+ O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+ << utostr(ACPV->getLabelId())
+ << "+" << (unsigned)ACPV->getPCAdjustment();
+ if (ACPV->mustAddCurrentAddress())
+ O << "-.";
+ O << ")";
+ }
+ O << "\n";
+
+ // If the constant pool value is a extern weak symbol, remember to emit
+ // the weak reference.
+ if (GV && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ }
+ };
+} // end of anonymous namespace
+
+#include "ARMGenAsmWriter.inc"
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ AFI = MF.getInfo<ARMFunctionInfo>();
+ MCP = MF.getConstantPool();
+
+ SetupMachineFunction(MF);
+ O << "\n";
+
+ // NOTE: we don't print out constant pools here, they are handled as
+ // instructions.
+
+ O << "\n";
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ break;
+ case Function::ExternalLinkage:
+ SwitchToTextSection("\t.text", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToTextSection(
+ ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ } else {
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ }
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if (AFI->isThumbFunction()) {
+ EmitAlignment(1, F, AFI->getAlign());
+ O << "\t.code\t16\n";
+ O << "\t.thumb_func";
+ if (Subtarget->isTargetDarwin())
+ O << "\t" << CurrentFnName;
+ O << "\n";
+ InCPMode = false;
+ } else
+ EmitAlignment(2, F);
+
+ O << CurrentFnName << ":\n";
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ if (Subtarget->isTargetDarwin()) {
+ // If the function is empty, then we need to emit *something*. Otherwise,
+ // the function's label might be associated with something that it wasn't
+ // meant to be associated with. We emit a noop in this situation.
+ MachineFunction::iterator I = MF.begin();
+
+ if (++I == MF.end() && MF.front().empty())
+ O << "\tnop\n";
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ O.flush();
+
+ return false;
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ break;
+ case MachineOperand::MO_Immediate: {
+ if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+ O << "#";
+
+ O << MO.getImm();
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ if (isExt && isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ printSuffixedName(Name, "$stub");
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+
+ printOffset(MO.getOffset());
+
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp && Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ printSuffixedName(Name, "$stub");
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+ if (isCallOp && Subtarget->isTargetELF() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ O << "(PLT)";
+ break;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
+ const TargetAsmInfo *TAI) {
+ assert(V < (1 << 12) && "Not a valid so_imm value!");
+ unsigned Imm = ARM_AM::getSOImmValImm(V);
+ unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+ // Print low-level immediate formation info, per
+ // A5.1.3: "Data-processing operands - Immediate".
+ if (Rot) {
+ O << "#" << Imm << ", " << Rot;
+ // Pretty printed version.
+ if (VerboseAsm)
+ O << ' ' << TAI->getCommentString()
+ << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+ } else {
+ O << "#" << Imm;
+ }
+}
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Not a valid so_imm value!");
+ printSOImm(O, MO.getImm(), VerboseAsm, TAI);
+}
+
+/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
+/// followed by an 'orr' to materialize.
+void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImm() && "Not a valid so_imm value!");
+ unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
+ unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
+ printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI);
+ O << "\n\torr";
+ printPredicateOperand(MI, 2);
+ O << " ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 0);
+ O << ", ";
+ printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
+ << " ";
+
+ if (MO2.getReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << ImmOffs;
+ return;
+ }
+
+ O << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (MO2.getReg()) {
+ O << ", "
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).AsmName
+ << "]";
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << ImmOffs;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ assert(ImmOffs && "Malformed indexed load / store!");
+ O << "#"
+ << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << ImmOffs;
+}
+
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ if (MO1.getReg() == ARM::SP) {
+ bool isLDM = (MI->getOpcode() == ARM::LDM ||
+ MI->getOpcode() == ARM::LDM_RET);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ } else {
+ printOperand(MI, Op);
+ if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+ O << "!";
+ }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+ if (MO1.getReg() == ARM::SP) {
+ bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+ MI->getOpcode() == ARM::FLDMS);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ return;
+ } else if (Modifier && strcmp(Modifier, "base") == 0) {
+ // Used for FSTM{D|S} and LSTM{D|S} operations.
+ O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+ O << "!";
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ImmOffs*4;
+ }
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ if (Modifier && strcmp(Modifier, "label") == 0) {
+ printPCLabel(MI, Op+1);
+ return;
+ }
+
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+ unsigned Scale) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (MO3.getReg())
+ O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName;
+ else if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", #" << ImmOffs;
+ if (Scale > 1)
+ O << " * " << Scale;
+ }
+ O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs << " * 4";
+ O << "]";
+}
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
+ if (CC != ARMCC::AL)
+ O << ARMCondCodeToString(CC);
+}
+
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
+ unsigned Reg = MI->getOperand(opNum).getReg();
+ if (Reg) {
+ assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
+ O << 's';
+ }
+}
+
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+ int Id = (int)MI->getOperand(opNum).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+ O << "{";
+ for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ printOperand(MI, i);
+ if (i != e-1) O << ", ";
+ }
+ O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier) {
+ assert(Modifier && "This operand only works with a modifier!");
+ // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+ // data itself.
+ if (!strcmp(Modifier, "label")) {
+ unsigned ID = MI->getOperand(OpNo).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << ID << ":\n";
+ } else {
+ assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+ unsigned CPI = MI->getOperand(OpNo).getIndex();
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+
+ if (MCPE.isMachineConstantPoolEntry()) {
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ } else {
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ // remember to emit the weak reference
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+ }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+ unsigned JTI = MO1.getIndex();
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImm() << ":\n";
+
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ if (!JTEntryDirective)
+ JTEntryDirective = TAI->getData32bitsDirective();
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+ std::set<MachineBasicBlock*> JTSets;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (UseSet && JTSets.insert(MBB).second)
+ printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
+
+ O << JTEntryDirective << ' ';
+ if (UseSet)
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImm()
+ << "_set_" << MBB->getNumber();
+ else if (TM.getRelocationModel() == Reloc::PIC_) {
+ printBasicBlockLabel(MBB, false, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to JT
+ if (!TAI->getJumpTableDirective())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
+ } else
+ printBasicBlockLabel(MBB, false, false, false);
+ if (i != e-1)
+ O << '\n';
+ }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'a': // Don't print "#" before a global var name or constant.
+ case 'c': // Don't print "$" before a global var name or constant.
+ printOperand(MI, OpNo, "no_hash");
+ return false;
+ case 'P': // Print a VFP double precision register.
+ printOperand(MI, OpNo);
+ return false;
+ case 'Q':
+ if (TM.getTargetData()->isLittleEndian())
+ break;
+ // Fallthrough
+ case 'R':
+ if (TM.getTargetData()->isBigEndian())
+ break;
+ // Fallthrough
+ case 'H': // Write second word of DI / DF reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printAddrMode2Operand(MI, OpNo);
+ return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ int Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::CONSTPOOL_ENTRY:
+ if (!InCPMode && AFI->isThumbFunction()) {
+ EmitAlignment(2);
+ InCPMode = true;
+ }
+ break;
+ default: {
+ if (InCPMode && AFI->isThumbFunction())
+ InCPMode = false;
+ }}
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool ARMAsmPrinter::doInitialization(Module &M) {
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ // Emit ARM Build Attributes
+ if (Subtarget->isTargetELF()) {
+ // CPU Type
+ std::string CPUString = Subtarget->getCPUString();
+ if (CPUString != "generic")
+ O << "\t.cpu " << CPUString << '\n';
+
+ // FIXME: Emit FPU type
+ if (Subtarget->hasVFP2())
+ O << "\t.eabi_attribute " << ARMBuildAttrs::VFP_arch << ", 2\n";
+
+ // Signal various FP modes.
+ if (!UnsafeFPMath)
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_denormal << ", 1\n"
+ << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_exceptions << ", 1\n";
+
+ if (FiniteOnlyFPMath())
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 1\n";
+ else
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 3\n";
+
+ // 8-bytes alignment stuff.
+ O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n"
+ << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n";
+
+ // FIXME: Should we signal R9 usage?
+ }
+
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer()) // External global require no code
+ return;
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+ bool isDarwin = Subtarget->isTargetDarwin();
+
+ printVisibility(name, GVar->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type " << name << ",%object\n";
+
+ if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() &&
+ !(isDarwin &&
+ TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
+ // FIXME: This seems to be pretty darwin-specific
+
+ if (GVar->hasExternalLinkage()) {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl\t" << name << "\n";
+ O << Directive << "__DATA, __common, " << name << ", "
+ << Size << ", " << Align << "\n";
+ return;
+ }
+ }
+
+ if (GVar->hasLocalLinkage() || GVar->isWeakForLinker()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (isDarwin) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size
+ << ',' << Align;
+ } else if (GVar->hasCommonLinkage()) {
+ O << TAI->getCOMMDirective() << name << "," << Size
+ << ',' << Align;
+ } else {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ }
+ } else if (TAI->getLCOMMDirective() != NULL) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ } else {
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ } else {
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << "\n";
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << "\n";
+ return;
+ }
+ }
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (isDarwin) {
+ O << "\t.globl " << name << "\n"
+ << "\t.weak_definition " << name << "\n";
+ } else {
+ O << "\t.weak " << name << "\n";
+ }
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ O << "\t.globl " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ break;
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << "\n";
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << name << ", " << Size << "\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+
+bool ARMAsmPrinter::doFinalization(Module &M) {
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+ "none,16", 0);
+ else
+ SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+ "none,12", 0);
+
+ EmitAlignment(2);
+ O << "\t.code\t32\n";
+
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\tldr ip, ";
+ printSuffixedName(p, "$slp");
+ O << "\n";
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ printSuffixedName(p, "$scv");
+ O << ":\n";
+ O << "\tadd ip, pc, ip\n";
+ }
+ O << "\tldr pc, [ip, #0]\n";
+ printSuffixedName(p, "$slp");
+ O << ":\n";
+ O << "\t.long\t";
+ printSuffixedName(p, "$lazy_ptr");
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ O << "-(";
+ printSuffixedName(p, "$scv");
+ O << "+8)\n";
+ } else
+ O << "\n";
+ SwitchToDataSection(".lazy_symbol_pointer", 0);
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\t.long\tdyld_stub_binding_helper\n";
+ }
+ O << "\n";
+
+ // Output non-lazy-pointers for external and common global variables.
+ if (!GVNonLazyPtrs.empty()) {
+ SwitchToDataSection("\t.non_lazy_symbol_pointer", 0);
+ for (StringSet<>::iterator i = GVNonLazyPtrs.begin(),
+ e = GVNonLazyPtrs.end(); i != e; ++i) {
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << "\n";
+ O << "\t.long\t0\n";
+ }
+ }
+
+ if (!HiddenGVNonLazyPtrs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(),
+ e = HiddenGVNonLazyPtrs.end(); i != e; ++i) {
+ const char *p = i->getKeyData();
+ EmitAlignment(2);
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.long " << p << "\n";
+ }
+ }
+
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else {
+ // Emit final debug information for ELF.
+ DW->EndModule();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createARMCodePrinterPass - Returns a pass that prints the ARM
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
+ ARMTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
+ }
+ } Registrator;
+}
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..524a748665be
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMARMAsmPrinter
+ ARMAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMARMCodeGen n)
+
+add_dependencies(LLVMARMAsmPrinter ${n})
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
new file mode 100644
index 000000000000..ce36cec47b6e
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
new file mode 100644
index 000000000000..2ac40f535497
--- /dev/null
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS ARM.td)
+
+tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(ARMGenRegisterNames.inc -gen-register-enums)
+tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
+tablegen(ARMGenInstrNames.inc -gen-instr-enums)
+tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenCallingConv.inc -gen-callingconv)
+tablegen(ARMGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(ARMCodeGen
+ ARMCodeEmitter.cpp
+ ARMConstantIslandPass.cpp
+ ARMConstantPoolValue.cpp
+ ARMInstrInfo.cpp
+ ARMISelDAGToDAG.cpp
+ ARMISelLowering.cpp
+ ARMJITInfo.cpp
+ ARMLoadStoreOptimizer.cpp
+ ARMRegisterInfo.cpp
+ ARMSubtarget.cpp
+ ARMTargetAsmInfo.cpp
+ ARMTargetMachine.cpp
+ )
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
new file mode 100644
index 000000000000..9a3b9be5b345
--- /dev/null
+++ b/lib/Target/ARM/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMARMCodeGen
+TARGET = ARM
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
+ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenDAGISel.inc ARMGenSubtarget.inc \
+ ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 000000000000..4d3200b445c1
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,228 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+ the smallest.
+
+* Add support for compiling individual basic blocks in thumb mode, when in a
+ larger ARM function. This can be used for presumed cold code, like paths
+ to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+ load/store 32-bit integers with pre/postinc by using load/store multiple
+ instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+ and cmp instructions can use high registers. Also, we can use them as
+ temporaries to spill values into.
+
+* In thumb mode, short, byte, and bool preferred alignments are currently set
+ to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
+ of 4).
+
+//===---------------------------------------------------------------------===//
+
+Potential jumptable improvements:
+
+* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
+ jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
+ function is even smaller. This also applies to ARM.
+
+* Thumb jumptable codegen can improve given some help from the assembler. This
+ is what we generate right now:
+
+ .set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
+LPCRELL0:
+ mov r1, #PCRELV0
+ add r1, pc
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+ ...
+
+Note there is another pc relative add that we can take advantage of.
+ add r1, pc, #imm_8 * 4
+
+We should be able to generate:
+
+LPCRELL0:
+ add r1, LJTI1_0_0
+ ldr r0, [r0, r1]
+ cpy pc, r0
+ .align 2
+LJTI1_0_0:
+ .long LBB1_3
+
+if the assembler can translate the add to:
+ add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
+
+Note the assembler also does something similar to constpool load:
+LPCRELL0:
+ ldr r0, LCPI1_0
+=>
+ ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
+
+
+//===---------------------------------------------------------------------===//
+
+We compiles the following:
+
+define i16 @func_entry_2E_ce(i32 %i) {
+ switch i32 %i, label %bb12.exitStub [
+ i32 0, label %bb4.exitStub
+ i32 1, label %bb9.exitStub
+ i32 2, label %bb4.exitStub
+ i32 3, label %bb4.exitStub
+ i32 7, label %bb9.exitStub
+ i32 8, label %bb.exitStub
+ i32 9, label %bb9.exitStub
+ ]
+
+bb12.exitStub:
+ ret i16 0
+
+bb4.exitStub:
+ ret i16 1
+
+bb9.exitStub:
+ ret i16 2
+
+bb.exitStub:
+ ret i16 3
+}
+
+into:
+
+_func_entry_2E_ce:
+ mov r2, #1
+ lsl r2, r0
+ cmp r0, #9
+ bhi LBB1_4 @bb12.exitStub
+LBB1_1: @newFuncRoot
+ mov r1, #13
+ tst r2, r1
+ bne LBB1_5 @bb4.exitStub
+LBB1_2: @newFuncRoot
+ ldr r1, LCPI1_0
+ tst r2, r1
+ bne LBB1_6 @bb9.exitStub
+LBB1_3: @newFuncRoot
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+ bne LBB1_7 @bb.exitStub
+LBB1_4: @bb12.exitStub
+ mov r0, #0
+ bx lr
+LBB1_5: @bb4.exitStub
+ mov r0, #1
+ bx lr
+LBB1_6: @bb9.exitStub
+ mov r0, #2
+ bx lr
+LBB1_7: @bb.exitStub
+ mov r0, #3
+ bx lr
+LBB1_8:
+ .align 2
+LCPI1_0:
+ .long 642
+
+
+gcc compiles to:
+
+ cmp r0, #9
+ @ lr needed for prologue
+ bhi L2
+ ldr r3, L11
+ mov r2, #1
+ mov r1, r2, asl r0
+ ands r0, r3, r2, asl r0
+ movne r0, #2
+ bxne lr
+ tst r1, #13
+ beq L9
+L3:
+ mov r0, r2
+ bx lr
+L9:
+ tst r1, #256
+ movne r0, #3
+ bxne lr
+L2:
+ mov r0, #0
+ bx lr
+L12:
+ .align 2
+L11:
+ .long 642
+
+
+GCC is doing a couple of clever things here:
+ 1. It is predicating one of the returns. This isn't a clear win though: in
+ cases where that return isn't taken, it is replacing one condbranch with
+ two 'ne' predicated instructions.
+ 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+ tst. This will probably require whole function isel.
+ 3. GCC emits:
+ tst r1, #256
+ we emit:
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+
+
+//===---------------------------------------------------------------------===//
+
+When spilling in thumb mode and the sp offset is too large to fit in the ldr /
+str offset field, we load the offset from a constpool entry and add it to sp:
+
+ldr r2, LCPI
+add r2, sp
+ldr r2, [r2]
+
+These instructions preserve the condition code which is important if the spill
+is between a cmp and a bcc instruction. However, we can use the (potentially)
+cheaper sequnce if we know it's ok to clobber the condition register.
+
+add r2, sp, #255 * 4
+add r2, #132
+ldr r2, [r2, #7 * 4]
+
+This is especially bad when dynamic alloca is used. The all fixed size stack
+objects are referenced off the frame pointer with negative offsets. See
+oggenc for an example.
+
+//===---------------------------------------------------------------------===//
+
+We are reserving R3 as a scratch register under thumb mode. So if it is live in
+to the function, we save / restore R3 to / from R12. Until register scavenging
+is done, we should save R3 to a high callee saved reg at emitPrologue time
+(when hasFP is true or stack size is large) and restore R3 from that register
+instead. This allows us to at least get rid of the save to r12 everytime it is
+used.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+ ldr r5, LCPI1_0
+LPC0:
+ add r5, pc
+ ldr r6, LCPI1_1
+ ldr r2, LCPI1_2
+ cpy r3, r6
+ cpy lr, pc
+ bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Add ldmia, stmia support.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
new file mode 100644
index 000000000000..068c441ed737
--- /dev/null
+++ b/lib/Target/ARM/README.txt
@@ -0,0 +1,554 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend.
+//===---------------------------------------------------------------------===//
+
+Reimplement 'select' in terms of 'SEL'.
+
+* We would really like to support UXTAB16, but we need to prove that the
+ add doesn't need to overflow between the two 16-bit chunks.
+
+* Implement pre/post increment support. (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
+
+* Consider materializing FP constants like 0.0f and 1.0f using integer
+ immediate instructions then copy to FPU. Slower than load into FPU?
+
+//===---------------------------------------------------------------------===//
+
+Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
+time regalloc happens, these values are now in a 32-bit register, usually with
+the top-bits known to be sign or zero extended. If spilled, we should be able
+to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
+of the reload.
+
+Doing this reduces the size of the stack frame (important for thumb etc), and
+also increases the likelihood that we will be able to reload multiple values
+from the stack with a single load.
+
+//===---------------------------------------------------------------------===//
+
+The constant island pass is in good shape. Some cleanups might be desirable,
+but there is unlikely to be much improvement in the generated code.
+
+1. There may be some advantage to trying to be smarter about the initial
+placement, rather than putting everything at the end.
+
+2. There might be some compile-time efficiency to be had by representing
+consecutive islands as a single block rather than multiple blocks.
+
+3. Use a priority queue to sort constant pool users in inverse order of
+ position so we always process the one closed to the end of functions
+ first. This may simply CreateNewWater.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate copysign custom expansion. We are still generating crappy code with
+default expansion + if-conversion.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate one instruction from:
+
+define i32 @_Z6slow4bii(i32 %x, i32 %y) {
+ %tmp = icmp sgt i32 %x, %y
+ %retval = select i1 %tmp, i32 %x, i32 %y
+ ret i32 %retval
+}
+
+__Z6slow4bii:
+ cmp r0, r1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+=>
+
+__Z6slow4bii:
+ cmp r0, r1
+ movle r0, r1
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in. These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
+
+into:
+
+_foo:
+ rsb r1, r0, #0
+ cmn r0, #1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+
+This is very, uh, literal. This could be a 3 operation sequence:
+ t = (p sra 31);
+ res = (p xor t)-t
+
+Which would be better. This occurs in png decode.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+ when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
+
+ fldd d0, [r4]
+ fstd d0, [r4, #+32]
+ fldd d0, [r4, #+8]
+ fstd d0, [r4, #+40]
+ fldd d0, [r4, #+16]
+ fstd d0, [r4, #+48]
+ fldd d0, [r4, #+24]
+ fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+ ldr r0, LCPI17_3
+ ldr r1, LCPI17_4
+ ldr r2, LCPI17_5
+
+ as
+ ldr r0, LCPI17
+ ldr r1, LCPI17+4
+ ldr r2, LCPI17+8
+
+ Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+ adr r0, L6
+ ldmia r0, {r0-r1}
+
+ .align 2
+L6:
+ .long -858993459
+ .long 1074318540
+
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
+
+6) struct copies appear to be done field by field
+instead of by words, at least sometimes:
+
+struct foo { int x; short s; char c1; char c2; };
+void cpy(struct foo*a, struct foo*b) { *a = *b; }
+
+llvm code (-O2)
+ ldrb r3, [r1, #+6]
+ ldr r2, [r1]
+ ldrb r12, [r1, #+7]
+ ldrh r1, [r1, #+4]
+ str r2, [r0]
+ strh r1, [r0, #+4]
+ strb r3, [r0, #+6]
+ strb r12, [r0, #+7]
+gcc code (-O2)
+ ldmia r1, {r1-r2}
+ stmia r0, {r1-r2}
+
+In this benchmark poor handling of aggregate copies has shown up as
+having a large effect on size, and possibly speed as well (we don't have
+a good way to measure on ARM).
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {
+ double r = foo(3.1);
+ return x+r;
+}
+
+_bar:
+ stmfd sp!, {r4, r5, r7, lr}
+ add r7, sp, #8
+ mov r4, r0
+ mov r5, r1
+ fldd d0, LCPI1_0
+ fmrrd r0, r1, d0
+ bl _foo
+ fmdrr d0, r4, r5
+ fmsr s2, r0
+ fsitod d1, s2
+ faddd d0, d1, d0
+ fmrrd r0, r1, d0
+ ldmfd sp!, {r4, r5, r7, pc}
+
+Ignore the prologue and epilogue stuff for a second. Note
+ mov r4, r0
+ mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+ double d1;
+ int s1;
+};
+
+void foo(struct s S) {
+ printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+ stmfd sp!, {r7, lr}
+ add r7, sp, #0
+ sub sp, sp, #12
+ stmia sp, {r0, r1, r2}
+ ldmia sp, {r1-r2}
+ ldr r0, L5
+ ldr r3, [sp, #8]
+L2:
+ add r0, pc, r0
+ bl L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+ struct s S = {1.1, 2};
+ return S;
+}
+
+ mov ip, r0
+ ldr r0, L5
+ sub sp, sp, #12
+L2:
+ add r0, pc, r0
+ @ lr needed for prologue
+ ldmia r0, {r0, r1, r2}
+ stmia sp, {r0, r1, r2}
+ stmia ip, {r0, r1, r2}
+ mov r0, ip
+ add sp, sp, #12
+ bx lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
+entry:
+ %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
+ %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
+ cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+ cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+ cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
+ call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
+ ret void
+}
+
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+ beq LBB3
+ b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+Register scavenging is now implemented. The example in the previous version
+of this document produces optimal code at -O2.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+ to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+ patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates. For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+ int i = 0;
+
+ if (StackPtr != 0) {
+ while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+ Line[i++] = Stack[--StackPtr];
+ if (LineLen > 32768)
+ {
+ while (StackPtr != 0 && i < LineLen)
+ {
+ i++;
+ --StackPtr;
+ }
+ }
+ }
+ return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//
+
+gcc generates smaller code for this function at -O2 or -Os:
+
+void foo(signed char* p) {
+ if (*p == 3)
+ bar();
+ else if (*p == 4)
+ baz();
+ else if (*p == 5)
+ quux();
+}
+
+llvm decides it's a good idea to turn the repeated if...else into a
+binary tree, as if it were a switch; the resulting code requires -1
+compare-and-branches when *p<=2 or *p==5, the same number if *p==4
+or *p>6, and +1 if *p==3. So it should be a speed win
+(on balance). However, the revised code is larger, with 4 conditional
+branches instead of 3.
+
+More seriously, there is a byte->word extend before
+each comparison, where there should be only one, and the condition codes
+are not remembered when the same two values are compared twice.
+
+//===---------------------------------------------------------------------===//
+
+More register scavenging work:
+
+1. Use the register scavenger to track frame index materialized into registers
+ (those that do not fit in addressing modes) to allow reuse in the same BB.
+2. Finish scavenging for Thumb.
+3. We know some spills and restores are unnecessary. The issue is once live
+ intervals are merged, they are not never split. So every def is spilled
+ and every use requires a restore if the register allocator decides the
+ resulting live interval is not assigned a physical register. It may be
+ possible (with the help of the scavenger) to turn some spill / restore
+ pairs into register copies.
+
+//===---------------------------------------------------------------------===//
+
+More LSR enhancements possible:
+
+1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
+ in a load / store.
+2. Allow iv reuse even when a type conversion is required. For example, i8
+ and i32 load / store addressing modes are identical.
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+int foo(int a, int b, int c, int d) {
+ long long acc = (long long)a * (long long)b;
+ acc += (long long)c * (long long)d;
+ return (int)(acc >> 32);
+}
+
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies
+two signed 32-bit values to produce a 64-bit value, and accumulates this with
+a 64-bit value.
+
+We currently get this with both v4 and v6:
+
+_foo:
+ smull r1, r0, r1, r0
+ smull r3, r2, r3, r2
+ adds r3, r3, r1
+ adc r0, r2, r0
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+_Z8full_addjj:
+ adds r2, r1, r2
+ movcc r1, #0
+ movcs r1, #1
+ str r2, [r0, #0]
+ strb r1, [r0, #4]
+ mov pc, lr
+
+_Z11no_overflowjj:
+ cmn r0, r1
+ movcs r0, #0
+ movcc r0, #1
+ mov pc, lr
+
+not:
+
+__Z8full_addjj:
+ add r3, r2, r1
+ str r3, [r0]
+ mov r2, #1
+ mov r12, #0
+ cmp r3, r1
+ movlo r12, r2
+ str r12, [r0, #+4]
+ bx lr
+__Z11no_overflowjj:
+ add r3, r1, r0
+ mov r2, #1
+ mov r1, #0
+ cmp r3, r0
+ movhs r1, r2
+ mov r0, r1
+ bx lr
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
new file mode 100644
index 000000000000..281517614935
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,51 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ class AlphaTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+ FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+ FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS,
+ TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+ FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+ FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE);
+ FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+ FunctionPass *createAlphaBranchSelectionPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for Alpha registers. This defines a mapping from
+// register name to register number.
+//
+#include "AlphaGenRegisterNames.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#include "AlphaGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
new file mode 100644
index 000000000000..e3748c6a09f3
--- /dev/null
+++ b/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,66 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+ "Enable CIX extentions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ // let TSFlagsFields = [];
+ // let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6" , Alpha21264Itineraries, []>;
+def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AlphaInstrInfo;
+}
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 000000000000..aca8ca734897
--- /dev/null
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,67 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaBSel : public MachineFunctionPass {
+ static char ID;
+ AlphaBSel() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "Alpha Branch Selection";
+ }
+ };
+ char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+ return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI) {
+ if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+ MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+
+ // condbranch operands:
+ // 0. bc opcode
+ // 1. reg
+ // 2. target MBB
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+ }
+ }
+ }
+
+ return true;
+}
+
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
new file mode 100644
index 000000000000..f50f007c2076
--- /dev/null
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -0,0 +1,242 @@
+//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Alpha machine instructions
+// into relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-emitter"
+#include "AlphaTargetMachine.h"
+#include "AlphaRelocations.h"
+#include "Alpha.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+
+ class AlphaCodeEmitter {
+ MachineCodeEmitter &MCE;
+ public:
+ AlphaCodeEmitter(MachineCodeEmitter &mce) : MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO);
+ };
+
+ template <class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public AlphaCodeEmitter
+ {
+ const AlphaInstrInfo *II;
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+
+ public:
+ static char ID;
+ explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
+ II(0), TM(tm), MCE(mce) {}
+ Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii)
+ : MachineFunctionPass(&ID), AlphaCodeEmitter(mce),
+ II(&ii), TM(tm), MCE(mce) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Alpha Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI);
+
+ private:
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha
+/// code to the specified MCE object.
+
+FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+
+FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template <class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo();
+
+ do {
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ emitBasicBlock(*I);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+template <class CodeEmitter>
+void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ const MachineInstr &MI = *I;
+ switch(MI.getOpcode()) {
+ default:
+ MCE.emitWordLE(getBinaryCodeForInstr(*I));
+ break;
+ case Alpha::ALTENT:
+ case Alpha::PCLABEL:
+ case Alpha::MEMLABEL:
+ case TargetInstrInfo::IMPLICIT_DEF:
+ break; //skip these
+ }
+ }
+}
+
+static unsigned getAlphaRegNumber(unsigned Reg) {
+ switch (Reg) {
+ case Alpha::R0 : case Alpha::F0 : return 0;
+ case Alpha::R1 : case Alpha::F1 : return 1;
+ case Alpha::R2 : case Alpha::F2 : return 2;
+ case Alpha::R3 : case Alpha::F3 : return 3;
+ case Alpha::R4 : case Alpha::F4 : return 4;
+ case Alpha::R5 : case Alpha::F5 : return 5;
+ case Alpha::R6 : case Alpha::F6 : return 6;
+ case Alpha::R7 : case Alpha::F7 : return 7;
+ case Alpha::R8 : case Alpha::F8 : return 8;
+ case Alpha::R9 : case Alpha::F9 : return 9;
+ case Alpha::R10 : case Alpha::F10 : return 10;
+ case Alpha::R11 : case Alpha::F11 : return 11;
+ case Alpha::R12 : case Alpha::F12 : return 12;
+ case Alpha::R13 : case Alpha::F13 : return 13;
+ case Alpha::R14 : case Alpha::F14 : return 14;
+ case Alpha::R15 : case Alpha::F15 : return 15;
+ case Alpha::R16 : case Alpha::F16 : return 16;
+ case Alpha::R17 : case Alpha::F17 : return 17;
+ case Alpha::R18 : case Alpha::F18 : return 18;
+ case Alpha::R19 : case Alpha::F19 : return 19;
+ case Alpha::R20 : case Alpha::F20 : return 20;
+ case Alpha::R21 : case Alpha::F21 : return 21;
+ case Alpha::R22 : case Alpha::F22 : return 22;
+ case Alpha::R23 : case Alpha::F23 : return 23;
+ case Alpha::R24 : case Alpha::F24 : return 24;
+ case Alpha::R25 : case Alpha::F25 : return 25;
+ case Alpha::R26 : case Alpha::F26 : return 26;
+ case Alpha::R27 : case Alpha::F27 : return 27;
+ case Alpha::R28 : case Alpha::F28 : return 28;
+ case Alpha::R29 : case Alpha::F29 : return 29;
+ case Alpha::R30 : case Alpha::F30 : return 30;
+ case Alpha::R31 : case Alpha::F31 : return 31;
+ default:
+ assert(0 && "Unhandled reg");
+ abort();
+ }
+}
+
+unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+
+ unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+
+ if (MO.isReg()) {
+ rv = getAlphaRegNumber(MO.getReg());
+ } else if (MO.isImm()) {
+ rv = MO.getImm();
+ } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+ DOUT << MO << " is a relocated op for " << MI << "\n";
+ unsigned Reloc = 0;
+ int Offset = 0;
+ bool useGOT = false;
+ switch (MI.getOpcode()) {
+ case Alpha::BSR:
+ Reloc = Alpha::reloc_bsr;
+ break;
+ case Alpha::LDLr:
+ case Alpha::LDQr:
+ case Alpha::LDBUr:
+ case Alpha::LDWUr:
+ case Alpha::LDSr:
+ case Alpha::LDTr:
+ case Alpha::LDAr:
+ case Alpha::STQr:
+ case Alpha::STLr:
+ case Alpha::STWr:
+ case Alpha::STBr:
+ case Alpha::STSr:
+ case Alpha::STTr:
+ Reloc = Alpha::reloc_gprellow;
+ break;
+ case Alpha::LDAHr:
+ Reloc = Alpha::reloc_gprelhigh;
+ break;
+ case Alpha::LDQl:
+ Reloc = Alpha::reloc_literal;
+ useGOT = true;
+ break;
+ case Alpha::LDAg:
+ case Alpha::LDAHg:
+ Reloc = Alpha::reloc_gpdist;
+ Offset = MI.getOperand(3).getImm();
+ break;
+ default:
+ assert(0 && "unknown relocatable instruction");
+ abort();
+ }
+ if (MO.isGlobal())
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
+ Reloc, MO.getGlobal(), Offset,
+ isa<Function>(MO.getGlobal()),
+ useGOT));
+ else if (MO.isSymbol())
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(),
+ Offset, true));
+ else
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), Offset));
+ } else if (MO.isMBB()) {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Alpha::reloc_bsr, MO.getMBB()));
+ }else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+#include "AlphaGenCodeEmitter.inc"
+
+
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 000000000000..affcd3e7fec8
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,553 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+ //===--------------------------------------------------------------------===//
+ /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+ /// instructions for SelectionDAG operations.
+ class AlphaDAGToDAGISel : public SelectionDAGISel {
+ static const int64_t IMM_LOW = -32768;
+ static const int64_t IMM_HIGH = 32767;
+ static const int64_t IMM_MULT = 65536;
+ static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+ static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+ static int64_t get_ldah16(int64_t x) {
+ int64_t y = x / IMM_MULT;
+ if (x % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+ }
+
+ static int64_t get_lda16(int64_t x) {
+ return x - get_ldah16(x) * IMM_MULT;
+ }
+
+ /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+ /// instruction (if not, return 0). Note that this code accepts partial
+ /// zap masks. For example (and LHS, 1) is a valid zap, as long we know
+ /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are
+ /// in checking mode. If LHS is null, we assume that the mask has already
+ /// been validated before.
+ uint64_t get_zapImm(SDValue LHS, uint64_t Constant) {
+ uint64_t BitsToCheck = 0;
+ unsigned Result = 0;
+ for (unsigned i = 0; i != 8; ++i) {
+ if (((Constant >> 8*i) & 0xFF) == 0) {
+ // nothing to do.
+ } else {
+ Result |= 1 << i;
+ if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+ // If the entire byte is set, zapnot the byte.
+ } else if (LHS.getNode() == 0) {
+ // Otherwise, if the mask was previously validated, we know its okay
+ // to zapnot this entire byte even though all the bits aren't set.
+ } else {
+ // Otherwise we don't know that the it's okay to zapnot this entire
+ // byte. Only do this iff we can prove that the missing bits are
+ // already null, so the bytezap doesn't need to really null them.
+ BitsToCheck |= ~Constant & (0xFF << 8*i);
+ }
+ }
+ }
+
+ // If there are missing bits in a byte (for example, X & 0xEF00), check to
+ // see if the missing bits (0x1000) are already known zero if not, the zap
+ // isn't okay to do, as it won't clear all the required bits.
+ if (BitsToCheck &&
+ !CurDAG->MaskedValueIsZero(LHS,
+ APInt(LHS.getValueSizeInBits(),
+ BitsToCheck)))
+ return 0;
+
+ return Result;
+ }
+
+ static uint64_t get_zapImm(uint64_t x) {
+ unsigned build = 0;
+ for(int i = 0; i != 8; ++i) {
+ if ((x & 0x00FF) == 0x00FF)
+ build |= 1 << i;
+ else if ((x & 0x00FF) != 0)
+ return 0;
+ x >>= 8;
+ }
+ return build;
+ }
+
+
+ static uint64_t getNearPower2(uint64_t x) {
+ if (!x) return 0;
+ unsigned at = CountLeadingZeros_64(x);
+ uint64_t complow = 1 << (63 - at);
+ uint64_t comphigh = 1 << (64 - at);
+ //cerr << x << ":" << complow << ":" << comphigh << "\n";
+ if (abs(complow - x) <= abs(comphigh - x))
+ return complow;
+ else
+ return comphigh;
+ }
+
+ static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+ uint64_t y = getNearPower2(x);
+ if (swap)
+ return (y - x) == r;
+ else
+ return (x - y) == r;
+ }
+
+ static bool isFPZ(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && (CN->getValueAPF().isZero()));
+ }
+ static bool isFPZn(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->getValueAPF().isNegZero());
+ }
+ static bool isFPZp(SDValue N) {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ return (CN && CN->getValueAPF().isPosZero());
+ }
+
+ public:
+ explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+ : SelectionDAGISel(TM)
+ {}
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(int64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "Alpha DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ Op0 = Op;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ return false;
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+
+private:
+ SDValue getGlobalBaseReg();
+ SDValue getGlobalRetAddr();
+ void SelectCALL(SDValue Op);
+
+ };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDValue AlphaDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GP = 0;
+ for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(),
+ ee = RegInfo->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R29) {
+ GP = ii->second;
+ break;
+ }
+ assert(GP && "GOT PTR not in liveins");
+ // FIXME is there anywhere sensible to get a DebugLoc here?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), GP, MVT::i64);
+}
+
+/// getRASaveReg - Grab the return address
+///
+SDValue AlphaDAGToDAGISel::getGlobalRetAddr() {
+ unsigned RA = 0;
+ for(MachineRegisterInfo::livein_iterator ii = RegInfo->livein_begin(),
+ ee = RegInfo->livein_end(); ii != ee; ++ii)
+ if (ii->first == Alpha::R26) {
+ RA = ii->second;
+ break;
+ }
+ assert(RA && "RA PTR not in liveins");
+ // FIXME is there anywhere sensible to get a DebugLoc here?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), RA, MVT::i64);
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void AlphaDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case AlphaISD::CALL:
+ SelectCALL(Op);
+ return NULL;
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i32),
+ getI64Imm(0));
+ }
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDValue Result = getGlobalBaseReg();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+ case AlphaISD::GlobalRetAddr: {
+ SDValue Result = getGlobalRetAddr();
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ case AlphaISD::DivCall: {
+ SDValue Chain = CurDAG->getEntryNode();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1,
+ SDValue(0,0));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2,
+ Chain.getValue(1));
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0,
+ Chain.getValue(1));
+ SDNode *CNode =
+ CurDAG->getTargetNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag,
+ Chain, Chain.getValue(1));
+ Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64,
+ SDValue(CNode, 1));
+ return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+ }
+
+ case ISD::READCYCLECOUNTER: {
+ SDValue Chain = N->getOperand(0);
+ return CurDAG->getTargetNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+ Chain);
+ }
+
+ case ISD::Constant: {
+ uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (uval == 0) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ Alpha::R31, MVT::i64);
+ ReplaceUses(Op, Result);
+ return NULL;
+ }
+
+ int64_t val = (int64_t)uval;
+ int32_t val32 = (int32_t)val;
+ if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+ val >= IMM_LOW + IMM_LOW * IMM_MULT)
+ break; //(LDAH (LDA))
+ if ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+ // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true
+ break; //(zext (LDAH (LDA)))
+ //Else use the constant pool
+ ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval);
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+ SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+ getGlobalBaseReg());
+ return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other,
+ CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ MVT T = isDouble ? MVT::f64 : MVT::f32;
+ if (CN->getValueAPF().isPosZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else if (CN->getValueAPF().isNegZero()) {
+ return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+ T, CurDAG->getRegister(Alpha::F31, T),
+ CurDAG->getRegister(Alpha::F31, T));
+ } else {
+ abort();
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ unsigned Opc = Alpha::WTF;
+ bool rev = false;
+ bool inv = false;
+ switch(CC) {
+ default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!");
+ case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+ Opc = Alpha::CMPTEQ; break;
+ case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT:
+ Opc = Alpha::CMPTLT; break;
+ case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE:
+ Opc = Alpha::CMPTLE; break;
+ case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT:
+ Opc = Alpha::CMPTLT; rev = true; break;
+ case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE:
+ Opc = Alpha::CMPTLE; rev = true; break;
+ case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+ Opc = Alpha::CMPTEQ; inv = true; break;
+ case ISD::SETO:
+ Opc = Alpha::CMPTUN; inv = true; break;
+ case ISD::SETUO:
+ Opc = Alpha::CMPTUN; break;
+ };
+ SDValue tmp1 = N->getOperand(rev?1:0);
+ SDValue tmp2 = N->getOperand(rev?0:1);
+ SDNode *cmp = CurDAG->getTargetNode(Opc, dl, MVT::f64, tmp1, tmp2);
+ if (inv)
+ cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, dl,
+ MVT::f64, SDValue(cmp, 0),
+ CurDAG->getRegister(Alpha::F31, MVT::f64));
+ switch(CC) {
+ case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+ case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+ {
+ SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, dl, MVT::f64,
+ tmp1, tmp2);
+ cmp = CurDAG->getTargetNode(Alpha::ADDT, dl, MVT::f64,
+ SDValue(cmp2, 0), SDValue(cmp, 0));
+ break;
+ }
+ default: break;
+ }
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, dl,
+ MVT::i64, SDValue(cmp, 0));
+ return CurDAG->getTargetNode(Alpha::CMPULT, dl, MVT::i64,
+ CurDAG->getRegister(Alpha::R31, MVT::i64),
+ SDValue(LD,0));
+ }
+ break;
+
+ case ISD::SELECT:
+ if (N->getValueType(0).isFloatingPoint() &&
+ (N->getOperand(0).getOpcode() != ISD::SETCC ||
+ !N->getOperand(0).getOperand(1).getValueType().isFloatingPoint())) {
+ //This should be the condition not covered by the Patterns
+ //FIXME: Don't have SelectCode die, but rather return something testable
+ // so that things like this can be caught in fall though code
+ //move int to fp
+ bool isDouble = N->getValueType(0) == MVT::f64;
+ SDValue cond = N->getOperand(0);
+ SDValue TV = N->getOperand(1);
+ SDValue FV = N->getOperand(2);
+
+ SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, dl, MVT::f64, cond);
+ return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES,
+ dl, MVT::f64, FV, TV, SDValue(LD,0));
+ }
+ break;
+
+ case ISD::AND: {
+ ConstantSDNode* SC = NULL;
+ ConstantSDNode* MC = NULL;
+ if (N->getOperand(0).getOpcode() == ISD::SRL &&
+ (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+ (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+ uint64_t sval = SC->getZExtValue();
+ uint64_t mval = MC->getZExtValue();
+ // If the result is a zap, let the autogened stuff handle it.
+ if (get_zapImm(N->getOperand(0), mval))
+ break;
+ // given mask X, and shift S, we want to see if there is any zap in the
+ // mask if we play around with the botton S bits
+ uint64_t dontcare = (~0ULL) >> (64 - sval);
+ uint64_t mask = mval << sval;
+
+ if (get_zapImm(mask | dontcare))
+ mask = mask | dontcare;
+
+ if (get_zapImm(mask)) {
+ SDValue Z =
+ SDValue(CurDAG->getTargetNode(Alpha::ZAPNOTi, dl, MVT::i64,
+ N->getOperand(0).getOperand(0),
+ getI64Imm(get_zapImm(mask))), 0);
+ return CurDAG->getTargetNode(Alpha::SRLr, dl, MVT::i64, Z,
+ getI64Imm(sval));
+ }
+ }
+ break;
+ }
+
+ }
+
+ return SelectCode(Op);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDValue Op) {
+ //TODO: add flag stuff to prevent nondeturministic breakage!
+
+ SDNode *N = Op.getNode();
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ SDValue InFlag(0,0); // Null incoming flag value.
+ DebugLoc dl = N->getDebugLoc();
+
+ std::vector<SDValue> CallOperands;
+ std::vector<MVT> TypeOperands;
+
+ //grab the arguments
+ for(int i = 2, e = N->getNumOperands(); i < e; ++i) {
+ TypeOperands.push_back(N->getOperand(i).getValueType());
+ CallOperands.push_back(N->getOperand(i));
+ }
+ int count = N->getNumOperands() - 2;
+
+ static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18,
+ Alpha::R19, Alpha::R20, Alpha::R21};
+ static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18,
+ Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (int i = 6; i < count; ++i) {
+ unsigned Opc = Alpha::WTF;
+ if (TypeOperands[i].isInteger()) {
+ Opc = Alpha::STQ;
+ } else if (TypeOperands[i] == MVT::f32) {
+ Opc = Alpha::STS;
+ } else if (TypeOperands[i] == MVT::f64) {
+ Opc = Alpha::STT;
+ } else
+ assert(0 && "Unknown operand");
+
+ SDValue Ops[] = { CallOperands[i], getI64Imm((i - 6) * 8),
+ CurDAG->getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64),
+ Chain };
+ Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 4), 0);
+ }
+ for (int i = 0; i < std::min(6, count); ++i) {
+ if (TypeOperands[i].isInteger()) {
+ Chain = CurDAG->getCopyToReg(Chain, dl, args_int[i],
+ CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) {
+ Chain = CurDAG->getCopyToReg(Chain, dl, args_float[i],
+ CallOperands[i], InFlag);
+ InFlag = Chain.getValue(1);
+ } else
+ assert(0 && "Unknown operand");
+ }
+
+ // Finally, once everything is in registers to pass to the call, emit the
+ // call itself.
+ if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+ SDValue GOT = getGlobalBaseReg();
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getTargetNode(Alpha::BSR, dl, MVT::Other,
+ MVT::Flag, Addr.getOperand(0),
+ Chain, InFlag), 0);
+ } else {
+ Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = SDValue(CurDAG->getTargetNode(Alpha::JSR, dl, MVT::Other,
+ MVT::Flag, Chain, InFlag), 0);
+ }
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDValue> CallResults;
+
+ switch (N->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i64:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::R0, MVT::i64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f32:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::F0, MVT::f32, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ case MVT::f64:
+ Chain = CurDAG->getCopyFromReg(Chain, dl,
+ Alpha::F0, MVT::f64, InFlag).getValue(1);
+ CallResults.push_back(Chain.getValue(0));
+ break;
+ }
+
+ CallResults.push_back(Chain);
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+ return new AlphaDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 000000000000..10011125cf85
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,798 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value. It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+ TargetRegisterClass *RC) {
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
+ // Set up the TargetLowering object.
+ //I am having problems with shr n ubyte 1
+ setShiftAmountType(MVT::i64);
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ setUsesGlobalOffsetTable(true);
+
+ addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+ addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+ addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+ if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ }
+ setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+
+ setOperationAction(ISD::SREM , MVT::i64, Custom);
+ setOperationAction(ISD::UREM , MVT::i64, Custom);
+ setOperationAction(ISD::SDIV , MVT::i64, Custom);
+ setOperationAction(ISD::UDIV , MVT::i64, Custom);
+
+ setOperationAction(ISD::ADDC , MVT::i64, Expand);
+ setOperationAction(ISD::ADDE , MVT::i64, Expand);
+ setOperationAction(ISD::SUBC , MVT::i64, Expand);
+ setOperationAction(ISD::SUBE , MVT::i64, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Not implemented yet.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool and
+ // ExternalSymbols nodes into the appropriate instructions to
+ // materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+ addLegalFPImmediate(APFloat(+0.0)); //F31
+ addLegalFPImmediate(APFloat(+0.0f)); //F31
+ addLegalFPImmediate(APFloat(-0.0)); //-F31
+ addLegalFPImmediate(APFloat(-0.0f)); //-F31
+
+ setJumpBufSize(272);
+ setJumpBufAlignment(16);
+
+ computeRegisterProperties();
+}
+
+MVT AlphaTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+ case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+ case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+ case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+ case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+ case AlphaISD::RelLit: return "Alpha::RelLit";
+ case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+ case AlphaISD::CALL: return "Alpha::CALL";
+ case AlphaISD::DivCall: return "Alpha::DivCall";
+ case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+ case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+ case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+ }
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+ return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n 0(SP) ... (n-7)*8(SP)
+
+// //#define FP $15
+// //#define RA $26
+// //#define PV $27
+// //#define GP $29
+// //#define SP $30
+
+static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
+ int &VarArgsBase,
+ int &VarArgsOffset) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ std::vector<SDValue> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ AddLiveIn(MF, Alpha::R29, &Alpha::GPRCRegClass); //GP
+ AddLiveIn(MF, Alpha::R26, &Alpha::GPRCRegClass); //RA
+
+ unsigned args_int[] = {
+ Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+ unsigned args_float[] = {
+ Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) {
+ SDValue argt;
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ SDValue ArgVal;
+
+ if (ArgNo < 6) {
+ switch (ObjectVT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::f64:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::f32:
+ args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+ &Alpha::F4RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT);
+ break;
+ case MVT::i64:
+ args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+ &Alpha::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Root, dl, args_int[ArgNo], MVT::i64);
+ break;
+ }
+ } else { //more args
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ }
+ ArgValues.push_back(ArgVal);
+ }
+
+ // If the functions takes variable number of arguments, copy all regs to stack
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ if (isVarArg) {
+ VarArgsOffset = (Op.getNode()->getNumValues()-1) * 8;
+ std::vector<SDValue> LS;
+ for (int i = 0; i < 6; ++i) {
+ if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+ args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+ SDValue argt = DAG.getCopyFromReg(Root, dl, args_int[i], MVT::i64);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
+ if (i == 0) VarArgsBase = FI;
+ SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+
+ if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+ args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+ argt = DAG.getCopyFromReg(Root, dl, args_float[i], MVT::f64);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
+ SDFI = DAG.getFrameIndex(FI, MVT::i64);
+ LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0));
+ }
+
+ //Set up a token factor with all the stack traffic
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Copy = DAG.getCopyToReg(Op.getOperand(0), dl, Alpha::R26,
+ DAG.getNode(AlphaISD::GlobalRetAddr,
+ DebugLoc::getUnknownLoc(),
+ MVT::i64),
+ SDValue());
+ switch (Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ break;
+ //return SDValue(); // ret void is legal
+ case 3: {
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg;
+ if (ArgVT.isInteger())
+ ArgReg = Alpha::R0;
+ else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg = Alpha::F0;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+ Op.getOperand(1), Copy.getValue(1));
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+ break;
+ }
+ case 5: {
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg1, ArgReg2;
+ if (ArgVT.isInteger()) {
+ ArgReg1 = Alpha::R0;
+ ArgReg2 = Alpha::R1;
+ } else {
+ assert(ArgVT.isFloatingPoint());
+ ArgReg1 = Alpha::F0;
+ ArgReg2 = Alpha::F1;
+ }
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+ Op.getOperand(1), Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+ Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+ Op.getOperand(3), Copy.getValue(1));
+ if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+ DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+ == DAG.getMachineFunction().getRegInfo().liveout_end())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+ break;
+ }
+ }
+ return DAG.getNode(AlphaISD::RET_FLAG, dl,
+ MVT::Other, Copy, Copy.getValue(1));
+}
+
+std::pair<SDValue, SDValue>
+AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned CallingConv,
+ bool isTailCall, SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl) {
+ int NumBytes = 0;
+ if (Args.size() > 6)
+ NumBytes = (Args.size() - 6) * 8;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ std::vector<SDValue> args_to_use;
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ switch (getValueType(Args[i].Ty).getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Promote the integer to 64 bits. If the input type is signed use a
+ // sign extend, otherwise use a zero extend.
+ if (Args[i].isSExt)
+ Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, dl,
+ MVT::i64, Args[i].Node);
+ else if (Args[i].isZExt)
+ Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i64, Args[i].Node);
+ else
+ Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Args[i].Node);
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::f32:
+ break;
+ }
+ args_to_use.push_back(Args[i].Node);
+ }
+
+ std::vector<MVT> RetVals;
+ MVT RetTyVT = getValueType(RetTy);
+ MVT ActualRetTyVT = RetTyVT;
+ if (RetTyVT.getSimpleVT() >= MVT::i1 && RetTyVT.getSimpleVT() <= MVT::i32)
+ ActualRetTyVT = MVT::i64;
+
+ if (RetTyVT != MVT::isVoid)
+ RetVals.push_back(ActualRetTyVT);
+ RetVals.push_back(MVT::Other);
+
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
+ SDValue TheCall = DAG.getNode(AlphaISD::CALL, dl,
+ RetVals, &Ops[0], Ops.size());
+ Chain = TheCall.getValue(RetTyVT != MVT::isVoid);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ SDValue RetVal = TheCall;
+
+ if (RetTyVT != ActualRetTyVT) {
+ ISD::NodeType AssertKind = ISD::DELETED_NODE;
+ if (RetSExt)
+ AssertKind = ISD::AssertSext;
+ else if (RetZExt)
+ AssertKind = ISD::AssertZext;
+
+ if (AssertKind != ISD::DELETED_NODE)
+ RetVal = DAG.getNode(AssertKind, dl, MVT::i64, RetVal,
+ DAG.getValueType(RetTyVT));
+
+ RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal);
+ }
+
+ return std::make_pair(RetVal, Chain);
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+ SDValue &DataPtr, SelectionDAG &DAG) {
+ Chain = N->getOperand(0);
+ SDValue VAListP = N->getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0);
+ SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ Tmp, NULL, 0, MVT::i32);
+ DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+ if (N->getValueType(0).isFloatingPoint())
+ {
+ //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+ SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+ DAG.getConstant(8*6, MVT::i64));
+ SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+ DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+ DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+ }
+
+ SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+ DAG.getConstant(8, MVT::i64));
+ Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
+ MVT::i32);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG,
+ VarArgsBase,
+ VarArgsOffset);
+
+ case ISD::RET: return LowerRET(Op,DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: break; // Don't custom lower most intrinsics.
+ case Intrinsic::alpha_umulh:
+ return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ }
+
+ case ISD::SINT_TO_FP: {
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ SDValue LD;
+ bool isDouble = Op.getValueType() == MVT::f64;
+ LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+ isDouble?MVT::f64:MVT::f32, LD);
+ return FP;
+ }
+ case ISD::FP_TO_SINT: {
+ bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+ SDValue src = Op.getOperand(0);
+
+ if (!isDouble) //Promote
+ src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+ src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src);
+ }
+ case ISD::ConstantPool: {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+ // FIXME there isn't really any debug info here
+
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+ return Lo;
+ }
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Alpha.");
+ case ISD::GlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
+ // FIXME there isn't really any debug info here
+
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
+ if (GV->hasLocalLinkage()) {
+ SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+ return Lo;
+ } else
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+ case ISD::ExternalSymbol: {
+ return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+ DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+ ->getSymbol(), MVT::i64),
+ DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+ }
+
+ case ISD::UREM:
+ case ISD::SREM:
+ //Expand only on constant case
+ if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+ MVT VT = Op.getNode()->getValueType(0);
+ SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+ BuildUDIV(Op.getNode(), DAG, NULL) :
+ BuildSDIV(Op.getNode(), DAG, NULL);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+ return Tmp1;
+ }
+ //fall through
+ case ISD::SDIV:
+ case ISD::UDIV:
+ if (Op.getValueType().isInteger()) {
+ if (Op.getOperand(1).getOpcode() == ISD::Constant)
+ return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+ : BuildUDIV(Op.getNode(), DAG, NULL);
+ const char* opstr = 0;
+ switch (Op.getOpcode()) {
+ case ISD::UREM: opstr = "__remqu"; break;
+ case ISD::SREM: opstr = "__remq"; break;
+ case ISD::UDIV: opstr = "__divqu"; break;
+ case ISD::SDIV: opstr = "__divq"; break;
+ }
+ SDValue Tmp1 = Op.getOperand(0),
+ Tmp2 = Op.getOperand(1),
+ Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+ return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+ }
+ break;
+
+ case ISD::VAARG: {
+ SDValue Chain, DataPtr;
+ LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+ SDValue Result;
+ if (Op.getValueType() == MVT::i32)
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ NULL, 0, MVT::i32);
+ else
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0);
+ return Result;
+ }
+ case ISD::VACOPY: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DestP = Op.getOperand(1);
+ SDValue SrcP = Op.getOperand(2);
+ const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
+ SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
+ SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+ DAG.getConstant(8, MVT::i64));
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ NP, NULL,0, MVT::i32);
+ SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32);
+ }
+ case ISD::VASTART: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue VAListP = Op.getOperand(1);
+ const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // vastart stores the address of the VarArgsBase and VarArgsOffset
+ SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64);
+ SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
+ SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
+ SA2, NULL, 0, MVT::i32);
+ }
+ case ISD::RETURNADDR:
+ return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
+ MVT::i64);
+ //FIXME: implement
+ case ISD::FRAMEADDR: break;
+ }
+
+ return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ assert(N->getValueType(0) == MVT::i32 &&
+ N->getOpcode() == ISD::VAARG &&
+ "Unknown node to custom promote!");
+
+ SDValue Chain, DataPtr;
+ LowerVAARG(N, Chain, DataPtr, DAG);
+ SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0);
+ Results.push_back(Res);
+ Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'f':
+ case 'r':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::vector<unsigned> AlphaTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break; // Unknown constriant letter
+ case 'f':
+ return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
+ Alpha::F3 , Alpha::F4 , Alpha::F5 ,
+ Alpha::F6 , Alpha::F7 , Alpha::F8 ,
+ Alpha::F9 , Alpha::F10, Alpha::F11,
+ Alpha::F12, Alpha::F13, Alpha::F14,
+ Alpha::F15, Alpha::F16, Alpha::F17,
+ Alpha::F18, Alpha::F19, Alpha::F20,
+ Alpha::F21, Alpha::F22, Alpha::F23,
+ Alpha::F24, Alpha::F25, Alpha::F26,
+ Alpha::F27, Alpha::F28, Alpha::F29,
+ Alpha::F30, Alpha::F31, 0);
+ case 'r':
+ return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
+ Alpha::R3 , Alpha::R4 , Alpha::R5 ,
+ Alpha::R6 , Alpha::R7 , Alpha::R8 ,
+ Alpha::R9 , Alpha::R10, Alpha::R11,
+ Alpha::R12, Alpha::R13, Alpha::R14,
+ Alpha::R15, Alpha::R16, Alpha::R17,
+ Alpha::R18, Alpha::R19, Alpha::R20,
+ Alpha::R21, Alpha::R22, Alpha::R23,
+ Alpha::R24, Alpha::R25, Alpha::R26,
+ Alpha::R27, Alpha::R28, Alpha::R29,
+ Alpha::R30, Alpha::R31, 0);
+ }
+ }
+
+ return std::vector<unsigned>();
+}
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ assert((MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::CAS64 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::LAS64 ||
+ MI->getOpcode() == Alpha::SWAP32 ||
+ MI->getOpcode() == Alpha::SWAP64) &&
+ "Unexpected instr type to insert");
+
+ bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+ MI->getOpcode() == Alpha::LAS32 ||
+ MI->getOpcode() == Alpha::SWAP32;
+
+ //Load locked store conditional for atomic ops take on the same form
+ //start:
+ //ll
+ //do stuff (maybe branch to exit)
+ //sc
+ //test sc and maybe branck to start
+ //exit:
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ sinkMBB->transferSuccessors(thisMBB);
+
+ F->insert(It, llscMBB);
+ F->insert(It, sinkMBB);
+
+ BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+ unsigned reg_res = MI->getOperand(0).getReg(),
+ reg_ptr = MI->getOperand(1).getReg(),
+ reg_v2 = MI->getOperand(2).getReg(),
+ reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+ reg_res).addImm(0).addReg(reg_ptr);
+ switch (MI->getOpcode()) {
+ case Alpha::CAS32:
+ case Alpha::CAS64: {
+ unsigned reg_cmp
+ = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+ BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+ .addReg(reg_v2).addReg(reg_res);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+ break;
+ }
+ case Alpha::LAS32:
+ case Alpha::LAS64: {
+ BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+ .addReg(reg_res).addReg(reg_v2);
+ break;
+ }
+ case Alpha::SWAP32:
+ case Alpha::SWAP64: {
+ BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+ .addReg(reg_v2).addReg(reg_v2);
+ break;
+ }
+ }
+ BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+ .addReg(reg_store).addImm(0).addReg(reg_ptr);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+ .addImm(0).addReg(reg_store).addMBB(llscMBB);
+ BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(llscMBB);
+ llscMBB->addSuccessor(sinkMBB);
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+
+ return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Alpha target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 000000000000..fdd817c76488
--- /dev/null
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,114 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+ namespace AlphaISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ //These corrospond to the identical Instruction
+ CVTQT_, CVTQS_, CVTTQ_,
+
+ /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+ /// parts of a global address respectively.
+ GPRelHi, GPRelLo,
+
+ /// RetLit - Literal Relocation of a Global
+ RelLit,
+
+ /// GlobalRetAddr - used to restore the return address
+ GlobalRetAddr,
+
+ /// CALL - Normal call.
+ CALL,
+
+ /// DIVCALL - used for special library calls for div and rem
+ DivCall,
+
+ /// return flag operand
+ RET_FLAG,
+
+ /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction.
+ /// *PRC is the input register to compare to zero,
+ /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+ /// DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH_I, COND_BRANCH_F
+
+ };
+ }
+
+ class AlphaTargetLowering : public TargetLowering {
+ int VarArgsOffset; // What is the offset to the first vaarg
+ int VarArgsBase; // What is the base FrameIndex
+ bool useITOF;
+ public:
+ explicit AlphaTargetLowering(TargetMachine &TM);
+
+ /// getSetCCResultType - Get the SETCC result ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ // Friendly names for dumps
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDValue, SDValue>
+ LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
+ bool isVarArg, bool isInreg, unsigned CC, bool isTailCall,
+ SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ bool hasITOF() { return useITOF; }
+
+ MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ private:
+ // Helpers for custom lowering.
+ void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+ SelectionDAG &DAG);
+
+ };
+}
+
+#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 000000000000..6d82875fad2d
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm : Operand<i64>;
+def s14imm : Operand<i64>;
+def s16imm : Operand<i64>;
+def s21imm : Operand<i64>;
+def s64imm : Operand<i64>;
+def u64imm : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+ field bits<32> Inst;
+ let Namespace = "Alpha";
+ let AsmString = asmstr;
+ let Inst{31-26} = op;
+ let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let canFoldAsLoad = load;
+ let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+ bits<5> Ra;
+ bits<16> disp;
+ bits<5> Rb;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+
+ let OutOperandList = (ops GPRC:$RA);
+ let InOperandList = (ops);
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = (ops);
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern=pattern;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<14> disp;
+
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-14} = TB;
+ let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+ bits<64> Opc; //dummy
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (ops);
+ let InOperandList = (ops target:$DISP);
+ bits<5> Ra;
+ bits<21> disp;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RB);
+
+ bits<5> Rc;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = 31;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<5> Rb;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-16} = Rb;
+ let Inst{15-13} = 0;
+ let Inst{12} = 0;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RC);
+ let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+ let OutOperandList = (outs GPRC:$RDEST);
+ let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+ let Constraints = "$RFALSE = $RDEST";
+ let DisableEncoding = "$RFALSE";
+
+ bits<5> Rc;
+ bits<5> Ra;
+ bits<8> LIT;
+ bits<7> Function = fun;
+
+// let isTwoAddress = 1;
+ let Inst{25-21} = Ra;
+ let Inst{20-13} = LIT;
+ let Inst{12} = 1;
+ let Inst{11-5} = Function;
+ let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let Pattern = pattern;
+
+ bits<5> Fc;
+ bits<5> Fa;
+ bits<5> Fb;
+ bits<11> Function = fun;
+
+ let Inst{25-21} = Fa;
+ let Inst{20-16} = Fb;
+ let Inst{15-5} = Function;
+ let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = OL;
+ bits<26> Function;
+
+ let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin>
+ : InstAlpha<0, nm, itin> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let Pattern = pattern;
+
+}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 000000000000..a54d97d33c40
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,450 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaGenInstrInfo.inc"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+ : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)),
+ RI(*this) { }
+
+
+bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg, unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ unsigned oc = MI.getOpcode();
+ if (oc == Alpha::BISr ||
+ oc == Alpha::CPYSS ||
+ oc == Alpha::CPYST ||
+ oc == Alpha::CPYSSt ||
+ oc == Alpha::CPYSTs) {
+ // or r1, r2, r2
+ // cpys(s|t) r1 r2 r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid Alpha BIS instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ SrcSR = DstSR = 0;
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::LDL:
+ case Alpha::LDQ:
+ case Alpha::LDBU:
+ case Alpha::LDWU:
+ case Alpha::LDS:
+ case Alpha::LDT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ case Alpha::STL:
+ case Alpha::STQ:
+ case Alpha::STB:
+ case Alpha::STW:
+ case Alpha::STS:
+ case Alpha::STT:
+ if (MI->getOperand(1).isFI()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ:
+ case Alpha::BNE:
+ case Alpha::BGE:
+ case Alpha::BGT:
+ case Alpha::BLE:
+ case Alpha::BLT:
+ case Alpha::BLBC:
+ case Alpha::BLBS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Alpha branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB);
+ else // Conditional branch
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (isAlphaIntCondCode(Cond[0].getImm()))
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB);
+ return 2;
+}
+
+bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == Alpha::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else if (DestRC == Alpha::F4RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else if (DestRC == Alpha::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ } else {
+ // Attempt to copy register that is not GPR or FPR
+ return false;
+ }
+
+ return true;
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+ // << FrameIdx << "\n";
+ //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STT))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::STQ))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+void AlphaInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == Alpha::F4RCRegisterClass)
+ Opc = Alpha::STS;
+ else if (RC == Alpha::F8RCRegisterClass)
+ Opc = Alpha::STT;
+ else if (RC == Alpha::GPRCRegisterClass)
+ Opc = Alpha::STQ;
+ else
+ abort();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+ // << FrameIdx << "\n";
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == Alpha::F4RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::F8RCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else if (RC == Alpha::GPRCRegisterClass)
+ BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+ .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+ else
+ abort();
+}
+
+void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == Alpha::F4RCRegisterClass)
+ Opc = Alpha::LDS;
+ else if (RC == Alpha::F8RCRegisterClass)
+ Opc = Alpha::LDT;
+ else if (RC == Alpha::GPRCRegisterClass)
+ Opc = Alpha::LDQ;
+ else
+ abort();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ if (Ops.size() != 1) return NULL;
+
+ // Make sure this is a reg-reg copy.
+ unsigned Opc = MI->getOpcode();
+
+ MachineInstr *NewMI = NULL;
+ switch(Opc) {
+ default:
+ break;
+ case Alpha::BISr:
+ case Alpha::CPYSS:
+ case Alpha::CPYST:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ if (Ops[0] == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ Opc = (Opc == Alpha::BISr) ? Alpha::STQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(InReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ } else { // load -> move
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ Opc = (Opc == Alpha::BISr) ? Alpha::LDQ :
+ ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(OutReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FrameIndex)
+ .addReg(Alpha::F31);
+ }
+ }
+ break;
+ }
+ return NewMI;
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+ switch (Opcode) {
+ case Alpha::BEQ: return Alpha::BNE;
+ case Alpha::BNE: return Alpha::BEQ;
+ case Alpha::BGE: return Alpha::BLT;
+ case Alpha::BGT: return Alpha::BLE;
+ case Alpha::BLE: return Alpha::BGT;
+ case Alpha::BLT: return Alpha::BGE;
+ case Alpha::BLBC: return Alpha::BLBS;
+ case Alpha::BLBS: return Alpha::BLBC;
+ case Alpha::FBEQ: return Alpha::FBNE;
+ case Alpha::FBNE: return Alpha::FBEQ;
+ case Alpha::FBGE: return Alpha::FBLT;
+ case Alpha::FBGT: return Alpha::FBLE;
+ case Alpha::FBLE: return Alpha::FBGT;
+ case Alpha::FBLT: return Alpha::FBGE;
+ default:
+ assert(0 && "Unknown opcode");
+ }
+ return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == Alpha::BR) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+ if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+ SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two Alpha::BRs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == Alpha::BR &&
+ LastInst->getOpcode() == Alpha::BR) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != Alpha::BR &&
+ I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != Alpha::COND_BRANCH_I &&
+ I->getOpcode() != Alpha::COND_BRANCH_F)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case Alpha::RETDAG: // Return.
+ case Alpha::RETDAGp:
+ case Alpha::BR: // Uncond branch.
+ case Alpha::JMP: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+ Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+ return false;
+}
+
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 000000000000..182aa32f447a
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,97 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+namespace llvm {
+
+class AlphaInstrInfo : public TargetInstrInfoImpl {
+ const AlphaRegisterInfo RI;
+public:
+ AlphaInstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+ bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 000000000000..e73bdf9f6e91
--- /dev/null
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1137 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [
+ SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>;
+def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+ SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+ return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+ return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+ return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+ return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+ return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+ ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+ return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+ return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+ uint64_t x =
+ abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+ return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+ return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+ return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+ return ((uint64_t)~N->getZExtValue() + 1) ==
+ (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+ return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+ ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS) return 0;
+ uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+ return build != 0;
+}]>;
+
+def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+ (void)N; // silence warning.
+ return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+ return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+ N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+ return isPowerOf2_64(N->getZExtValue() -
+ getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+ int64_t d = abs64((int64_t)N->getZExtValue() -
+ (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+ if (isPowerOf2_64(d)) return false;
+ switch (d) {
+ case 1: case 3: case 5: return false;
+ default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+ };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 2), node:$op2)>;
+def sub4 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 2), node:$op2)>;
+def add8 : PatFrag<(ops node:$op1, node:$op2),
+ (add (shl node:$op1, 3), node:$op2)>;
+def sub8 : PatFrag<(ops node:$op1, node:$op2),
+ (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+ "; ADJUP $amt",
+ [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+ "; ADJDOWN $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+ "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+ [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+ [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+ (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+ (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+ (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq,
+ string asmstr, PatFrag OpNode, InstrItinClass itin> {
+ def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+ def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+ [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+ def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+ def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+ [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>;
+defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>;
+defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>;
+defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>;
+defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>;
+
+defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>;
+defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>;
+defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC",
+ [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC",
+ [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC",
+ [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC",
+ [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC",
+ [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+
+def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+ (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+ [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC",
+ [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+ [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+ [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+ [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC",
+ [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+ def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+ def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0",
+ [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+ def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+ def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+ Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+ def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+ [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+ [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+ [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+ [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+ [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+ [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+ [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+ [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+ [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+ [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+ [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address
+def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+ [], s_lda>; //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+ [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high
+}
+
+let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+ [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+ [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+ [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+ [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+ [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+ [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in {
+def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+ [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+ [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+ (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+ (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+ (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+ (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+ (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDBU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDWU immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+ (LDL immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+ (LDQ 0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+ (LDT 0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+ (LDS 0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+ (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+ (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+ (LDL 0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+ (STQ GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+ (STT F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+ (STS F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+ (STL GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+ (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+ (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (ops GPRC:$RA),
+ InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+ mayLoad = 1 in {
+def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+ [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+ (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+ InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+ Constraints = "$RA = $RR",
+ DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (ops GPRC:$RA),
+ InOperandList = (ops s64imm:$DISP, GPRC:$RB),
+ mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+ (WMB)>;
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+ (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+ [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in {
+def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+ [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+ [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in {
+def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+ [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in {
+def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in {
+def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+ [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+ [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ isTwoAddress = 1 in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+ (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+ (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+ [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+ [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+ [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+ [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+ [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+ [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+ : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ !strconcat(asmstr, " $R,$dst"), s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst",
+ s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst),
+ "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+ s_fbr>;
+//Branches, int
+def BEQ : br_icc<0x39, "beq">;
+def BGE : br_icc<0x3E, "bge">;
+def BGT : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE : br_icc<0x3B, "ble">;
+def BLT : br_icc<0x3A, "blt">;
+def BNE : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+ switch((uint64_t)N->getZExtValue()) {
+ default: assert(0 && "Unknown branch type");
+ case 0: return getI64Imm(Alpha::BEQ);
+ case 1: return getI64Imm(Alpha::BNE);
+ case 2: return getI64Imm(Alpha::BGE);
+ case 3: return getI64Imm(Alpha::BGT);
+ case 4: return getI64Imm(Alpha::BLE);
+ case 5: return getI64Imm(Alpha::BLT);
+ case 6: return getI64Imm(Alpha::BLBS);
+ case 7: return getI64Imm(Alpha::BLBC);
+ case 20: return getI64Imm(Alpha::FBEQ);
+ case 21: return getI64Imm(Alpha::FBNE);
+ case 22: return getI64Imm(Alpha::FBGE);
+ case 23: return getI64Imm(Alpha::FBGT);
+ case 24: return getI64Imm(Alpha::FBLE);
+ case 25: return getI64Imm(Alpha::FBLT);
+ }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP),
+ (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP),
+ (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
+ (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extentions, so let's not use them yet
+//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair
+ int64_t val = (int64_t)N->getZExtValue();
+ return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt : PatLeaf<(imm), [{
+ //true if imm fits in a LDAH LDA pair with zeroext
+ uint64_t uval = N->getZExtValue();
+ int32_t val32 = (int32_t)uval;
+ return ((uval >> 32) == 0 && //empty upper bits
+ val32 <= IMM_FULLHIGH);
+// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+ (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+ (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+ (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+ (ZAPNOTi (LDA (LL16 (SExt32 immConst2PartInt:$imm)),
+ (LDAH (LH16 (SExt32 immConst2PartInt:$imm)), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+// (R31)>;
+//def : Pat<(f64 0.0),
+// (F31)>;
+//def : Pat<(f64 -0.0),
+// (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+// (F31)>;
+//def : Pat<(f32 -0.0),
+// (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+ (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+ (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+ (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+ (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+ (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+ (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+ (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+ (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+ (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+ (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA),
+ (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+ (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+ (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
new file mode 100644
index 000000000000..3fecb19d73b7
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -0,0 +1,307 @@
+//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Alpha target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "AlphaJITInfo.h"
+#include "AlphaRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include <cstdlib>
+#include <map>
+using namespace llvm;
+
+#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC))
+#define BUILD_OFormat(Op, RA, RB, FUN, RC) \
+ ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC))
+
+#define BUILD_LDA(RD, RS, IMM16) \
+ ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_LDAH(RD, RS, IMM16) \
+ ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+
+#define BUILD_LDQ(RD, RS, IMM16) \
+ ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF))
+
+#define BUILD_JMP(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF))
+#define BUILD_JSR(RD, RS, IMM16) \
+ ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF))
+
+#define BUILD_SLLi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD))
+
+#define BUILD_ORi(RD, RS, IMM8) \
+ (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD))
+
+#define BUILD_OR(RD, RS, RT) \
+ (BUILD_OFormat(0x11, RS, RT, 0x20, RD))
+
+
+
+static void EmitBranchToAt(void *At, void *To) {
+ unsigned long Fn = (unsigned long)To;
+
+ unsigned *AtI = (unsigned*)At;
+
+ AtI[0] = BUILD_OR(0, 27, 27);
+
+ DOUT << "Stub targeting " << To << "\n";
+
+ for (int x = 1; x <= 8; ++x) {
+ AtI[2*x - 1] = BUILD_SLLi(27,27,8);
+ unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF;
+ //DOUT << "outputing " << hex << d << dec << "\n";
+ AtI[2*x] = BUILD_ORi(27, 27, d);
+ }
+ AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv
+ AtI[18] = 0x00FFFFFF; //mark this as a stub
+}
+
+void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ //FIXME
+ assert(0);
+}
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+//static AlphaJITInfo* AlphaJTI;
+
+extern "C" {
+#ifdef __alpha
+
+ void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub)
+ {
+ void* Target = JITCompilerFunction(CameFromStub);
+
+ //rewrite the stub to an unconditional branch
+ if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) {
+ DOUT << "Came from a stub, rewriting\n";
+ EmitBranchToAt(CameFromStub, Target);
+ } else {
+ DOUT << "confused, didn't come from stub at " << CameFromStub
+ << " old jump vector " << oldpv
+ << " new jump vector " << Target << "\n";
+ }
+
+ //Change pv to new Target
+ *oldpv = (long)Target;
+ }
+
+ void AlphaCompilationCallback(void);
+
+ asm(
+ ".text\n"
+ ".globl AlphaComilationCallbackC\n"
+ ".align 4\n"
+ ".globl AlphaCompilationCallback\n"
+ ".ent AlphaCompilationCallback\n"
+"AlphaCompilationCallback:\n"
+ // //get JIT's GOT
+ "ldgp $29, 0($27)\n"
+ //Save args, callee saved, and perhaps others?
+ //args: $16-$21 $f16-$f21 (12)
+ //callee: $9-$14 $f2-$f9 (14)
+ //others: fp:$15 ra:$26 pv:$27 (3)
+ "lda $30, -232($30)\n"
+ "stq $16, 0($30)\n"
+ "stq $17, 8($30)\n"
+ "stq $18, 16($30)\n"
+ "stq $19, 24($30)\n"
+ "stq $20, 32($30)\n"
+ "stq $21, 40($30)\n"
+ "stt $f16, 48($30)\n"
+ "stt $f17, 56($30)\n"
+ "stt $f18, 64($30)\n"
+ "stt $f19, 72($30)\n"
+ "stt $f20, 80($30)\n"
+ "stt $f21, 88($30)\n"
+ "stq $9, 96($30)\n"
+ "stq $10, 104($30)\n"
+ "stq $11, 112($30)\n"
+ "stq $12, 120($30)\n"
+ "stq $13, 128($30)\n"
+ "stq $14, 136($30)\n"
+ "stt $f2, 144($30)\n"
+ "stt $f3, 152($30)\n"
+ "stt $f4, 160($30)\n"
+ "stt $f5, 168($30)\n"
+ "stt $f6, 176($30)\n"
+ "stt $f7, 184($30)\n"
+ "stt $f8, 192($30)\n"
+ "stt $f9, 200($30)\n"
+ "stq $15, 208($30)\n"
+ "stq $26, 216($30)\n"
+ "stq $27, 224($30)\n"
+
+ "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg
+ "bis $0, $0, $17\n" //pass the roughly stub addr in second arg
+ "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra
+
+ "ldq $16, 0($30)\n"
+ "ldq $17, 8($30)\n"
+ "ldq $18, 16($30)\n"
+ "ldq $19, 24($30)\n"
+ "ldq $20, 32($30)\n"
+ "ldq $21, 40($30)\n"
+ "ldt $f16, 48($30)\n"
+ "ldt $f17, 56($30)\n"
+ "ldt $f18, 64($30)\n"
+ "ldt $f19, 72($30)\n"
+ "ldt $f20, 80($30)\n"
+ "ldt $f21, 88($30)\n"
+ "ldq $9, 96($30)\n"
+ "ldq $10, 104($30)\n"
+ "ldq $11, 112($30)\n"
+ "ldq $12, 120($30)\n"
+ "ldq $13, 128($30)\n"
+ "ldq $14, 136($30)\n"
+ "ldt $f2, 144($30)\n"
+ "ldt $f3, 152($30)\n"
+ "ldt $f4, 160($30)\n"
+ "ldt $f5, 168($30)\n"
+ "ldt $f6, 176($30)\n"
+ "ldt $f7, 184($30)\n"
+ "ldt $f8, 192($30)\n"
+ "ldt $f9, 200($30)\n"
+ "ldq $15, 208($30)\n"
+ "ldq $26, 216($30)\n"
+ "ldq $27, 224($30)\n" //this was updated in the callback with the target
+
+ "lda $30, 232($30)\n" //restore sp
+ "jmp $31, ($27)\n" //jump to the new function
+ ".end AlphaCompilationCallback\n"
+ );
+#else
+ void AlphaCompilationCallback() {
+ cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n";
+ abort();
+ }
+#endif
+}
+
+void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ //assert(Fn == AlphaCompilationCallback && "Where are you going?\n");
+ //Do things in a stupid slow way!
+ JCE.startGVStub(F, 19*4);
+ void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue();
+ for (int x = 0; x < 19; ++ x)
+ JCE.emitWordLE(0);
+ EmitBranchToAt(Addr, Fn);
+ DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n";
+ return JCE.finishGVStub(F);
+}
+
+TargetJITInfo::LazyResolverFn
+AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ // setZerothGOTEntry((void*)AlphaCompilationCallback);
+ return AlphaCompilationCallback;
+}
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ if (l % IMM_MULT < IMM_LOW)
+ --y;
+ assert((short)y == y && "displacement out of range");
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ long y = l - h * IMM_MULT;
+ assert(y == (short)y && "Displacement out of range");
+ return y;
+}
+
+void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ //because gpdist are paired and relative to the pc of the first inst,
+ //we need to have some state
+
+ static std::map<std::pair<void*, int>, void*> gpdistmap;
+
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ long idx = 0;
+ bool doCommon = true;
+ switch ((Alpha::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case Alpha::reloc_literal:
+ //This is a LDQl
+ idx = MR->getGOTIndex();
+ DOUT << "Literal relocation to slot " << idx;
+ idx = (idx - GOToffset) * 8;
+ DOUT << " offset " << idx << "\n";
+ break;
+ case Alpha::reloc_gprellow:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getLower16(idx);
+ DOUT << "gprellow relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gprelhigh:
+ idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8];
+ idx = getUpper16(idx);
+ DOUT << "gprelhigh relocation offset " << idx << "\n";
+ DOUT << " Pointer is " << (void*)MR->getResultPointer()
+ << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n";
+ break;
+ case Alpha::reloc_gpdist:
+ switch (*RelocPos >> 26) {
+ case 0x09: //LDAH
+ idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos;
+ idx = getUpper16(idx);
+ DOUT << "LDAH: " << idx << "\n";
+ //add the relocation to the map
+ gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos;
+ break;
+ case 0x08: //LDA
+ assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] &&
+ "LDAg without seeing LDAHg");
+ idx = &GOTBase[GOToffset * 8] -
+ (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())];
+ idx = getLower16(idx);
+ DOUT << "LDA: " << idx << "\n";
+ break;
+ default:
+ assert(0 && "Cannot handle gpdist yet");
+ }
+ break;
+ case Alpha::reloc_bsr: {
+ idx = (((unsigned char*)MR->getResultPointer() -
+ (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun
+ *RelocPos |= (idx & ((1 << 21)-1));
+ doCommon = false;
+ break;
+ }
+ }
+ if (doCommon) {
+ short x = (short)idx;
+ assert(x == idx);
+ *(short*)RelocPos = x;
+ }
+ }
+}
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
new file mode 100644
index 000000000000..edff990dbc24
--- /dev/null
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -0,0 +1,47 @@
+//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_JITINFO_H
+#define ALPHA_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class TargetMachine;
+
+ class AlphaJITInfo : public TargetJITInfo {
+ protected:
+ TargetMachine &TM;
+ public:
+ explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
+ { useGOT = true; }
+
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ private:
+ static const unsigned GOToffset = 4096;
+
+ };
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 000000000000..0c51bc554be9
--- /dev/null
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+ cl::opt<bool>
+ AlignAll("alpha-align-all", cl::Hidden,
+ cl::desc("Align all blocks"));
+
+ struct AlphaLLRPPass : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ AlphaTargetMachine &TM;
+
+ static char ID;
+ AlphaLLRPPass(AlphaTargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Alpha NOP inserter";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+ bool Changed = false;
+ MachineInstr* prev[3] = {0,0,0};
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ unsigned count = 0;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI) {
+ MachineBasicBlock& MBB = *FI;
+ bool ub = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ if (count%4 == 0)
+ prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+ ++count;
+ MachineInstr *MI = I++;
+ switch (MI->getOpcode()) {
+ case Alpha::LDQ: case Alpha::LDL:
+ case Alpha::LDWU: case Alpha::LDBU:
+ case Alpha::LDT: case Alpha::LDS:
+ case Alpha::STQ: case Alpha::STL:
+ case Alpha::STW: case Alpha::STB:
+ case Alpha::STT: case Alpha::STS:
+ if (MI->getOperand(2).getReg() == Alpha::R30) {
+ if (prev[0] &&
+ prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+ prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 1;
+ count += 1;
+ } else if (prev[1]
+ && prev[1]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[1]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[2];
+ prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31)
+ .addReg(Alpha::R31);
+ Changed = true; nopintro += 2;
+ count += 2;
+ } else if (prev[2]
+ && prev[2]->getOperand(2).getReg() ==
+ MI->getOperand(2).getReg()
+ && prev[2]->getOperand(1).getImm() ==
+ MI->getOperand(1).getImm()) {
+ prev[0] = prev[1] = prev[2] = 0;
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ Changed = true; nopintro += 3;
+ count += 3;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = MI;
+ break;
+ }
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ case Alpha::ALTENT:
+ case Alpha::MEMLABEL:
+ case Alpha::PCLABEL:
+ --count;
+ break;
+ case Alpha::BR:
+ case Alpha::JMP:
+ ub = true;
+ //fall through
+ default:
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ break;
+ }
+ }
+ if (ub || AlignAll) {
+ //we can align stuff for free at this point
+ while (count % 4) {
+ BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+ .addReg(Alpha::R31).addReg(Alpha::R31);
+ ++count;
+ ++nopalign;
+ prev[0] = prev[1];
+ prev[1] = prev[2];
+ prev[2] = 0;
+ }
+ }
+ }
+ return Changed;
+ }
+ };
+ char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+ return new AlphaLLRPPass(tm);
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 000000000000..feee6e467f47
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,335 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+//These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+
+static long getUpper16(long l)
+{
+ long y = l / IMM_MULT;
+ if (l % IMM_MULT > IMM_HIGH)
+ ++y;
+ return y;
+}
+
+static long getLower16(long l)
+{
+ long h = getUpper16(l);
+ return l - h * IMM_MULT;
+}
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+ : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+ TII(tii)
+{
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ Alpha::R9, Alpha::R10,
+ Alpha::R11, Alpha::R12,
+ Alpha::R13, Alpha::R14,
+ Alpha::F2, Alpha::F3,
+ Alpha::F4, Alpha::F5,
+ Alpha::F6, Alpha::F7,
+ Alpha::F8, Alpha::F9, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
+ &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Alpha::R15);
+ Reserved.set(Alpha::R30);
+ Reserved.set(Alpha::R31);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->hasVarSizedObjects();
+}
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+ // <amt>'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ MachineInstr *New;
+ if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(-Amount).addReg(Alpha::R30);
+ } else {
+ assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(Amount).addReg(Alpha::R30);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Add the base register of R30 (SP) or R15 (FP).
+ MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+ // Now add the frame object offset to the offset from the virtual frame index.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n";
+
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ DOUT << "Corrected Offset " << Offset
+ << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n";
+
+ if (Offset > IMM_HIGH || Offset < IMM_LOW) {
+ DOUT << "Unconditionally using R28 for evil purposes Offset: "
+ << Offset << "\n";
+ //so in this case, we need to use a temporary register, and move the
+ //original inst off the SP/FP
+ //fix up the old:
+ MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+ MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+ //insert the new
+ MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+ TII.get(Alpha::LDAH), Alpha::R28)
+ .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+ MBB.insert(II, nMI);
+ } else {
+ MI.getOperand(i).ChangeToImmediate(Offset);
+ }
+}
+
+
+void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+ bool FP = hasFP(MF);
+
+ static int curgpdist = 0;
+
+ //handle GOP offset
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R27).addImm(++curgpdist);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
+ .addReg(Alpha::R29).addImm(curgpdist);
+
+ //evil const_cast until MO stuff setup to handle const
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+ .addGlobalAddress(const_cast<Function*>(MF.getFunction()));
+
+ // Get the number of bytes to allocate from the FrameInfo
+ long NumBytes = MFI->getStackSize();
+
+ if (FP)
+ NumBytes += 8; //reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0) return;
+
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ NumBytes = (NumBytes+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r30 -= numbytes
+ NumBytes = -NumBytes;
+ if (NumBytes >= IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) >= IMM_LOW) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+
+ //now if we need to, save the old FP and set the new
+ if (FP)
+ {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+ .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+ //this must be the last instr in the prolog
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+ .addReg(Alpha::R30).addReg(Alpha::R30);
+ }
+
+}
+
+void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert((MBBI->getOpcode() == Alpha::RETDAG ||
+ MBBI->getOpcode() == Alpha::RETDAGp)
+ && "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ long NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+ .addReg(Alpha::R15);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+ .addImm(0).addReg(Alpha::R15);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+ .addReg(Alpha::R30);
+ } else if (getUpper16(NumBytes) <= IMM_HIGH) {
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+ .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+ BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+ .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+ } else {
+ cerr << "Too big a stack frame at " << NumBytes << "\n";
+ abort();
+ }
+ }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "AlphaGenRegisterInfo.inc"
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+ std::string s(RegisterDescriptors[reg].Name);
+ return s;
+}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 000000000000..c4f5f7b421dd
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,67 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "AlphaGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 000000000000..35e6804ea6ac
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,171 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+ let Num = num;
+}
+
+//#define FP $15
+//#define RA $26
+//#define PV $27
+//#define GP $29
+//#define SP $30
+
+// General-purpose registers
+def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>;
+def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>;
+def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>;
+def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>;
+def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>;
+def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>;
+def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>;
+def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>;
+def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>;
+def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+ // //#define FP $15
+ // //#define RA $26
+ // //#define PV $27
+ // //#define GP $29
+ // //#define SP $30
+ // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64,
+ // Volatile
+ [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+ R23, R24, R25, R28,
+ //Special meaning, but volatile
+ R27, //procedure address
+ R26, //return address
+ R29, //global offset table address
+ // Non-volatile
+ R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+ R15, R30, R31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3;
+ }
+ }];
+}
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F4RCClass::iterator
+ F4RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1,
+ F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Saved:
+ F2, F3, F4, F5, F6, F7, F8, F9,
+ F31 ]> //zero
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ F8RCClass::iterator
+ F8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-1;
+ }
+ }];
+}
diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 000000000000..4c92045d4696
--- /dev/null
+++ b/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace Alpha {
+ enum RelocationType {
+ reloc_literal,
+ reloc_gprellow,
+ reloc_gprelhigh,
+ reloc_gpdist,
+ reloc_bsr
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 000000000000..b7b456084709
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,84 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0 : FuncUnit;
+def L1 : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0 : FuncUnit;
+def U1 : FuncUnit;
+def FA : FuncUnit;
+def FM : FuncUnit;
+
+def s_ild : InstrItinClass;
+def s_fld : InstrItinClass;
+def s_ist : InstrItinClass;
+def s_fst : InstrItinClass;
+def s_lda : InstrItinClass;
+def s_rpcc : InstrItinClass;
+def s_rx : InstrItinClass;
+def s_mxpr : InstrItinClass;
+def s_icbr : InstrItinClass;
+def s_ubr : InstrItinClass;
+def s_jsr : InstrItinClass;
+def s_iadd : InstrItinClass;
+def s_ilog : InstrItinClass;
+def s_ishf : InstrItinClass;
+def s_cmov : InstrItinClass;
+def s_imul : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr : InstrItinClass;
+def s_fadd : InstrItinClass;
+def s_fmul : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi : InstrItinClass;
+def s_itof : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2­4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<[
+ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>,
+ InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>,
+ InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>,
+ InstrItinData<s_fst , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_lda , [InstrStage<1, [L0, L1, U0, U1]>]>,
+ InstrItinData<s_rpcc , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_rx , [InstrStage<1, [L1]>]>,
+ InstrItinData<s_mxpr , [InstrStage<1, [L0, L1]>]>,
+ InstrItinData<s_icbr , [InstrStage<0, [U0, U1]>]>,
+ InstrItinData<s_ubr , [InstrStage<3, [U0, U1]>]>,
+ InstrItinData<s_jsr , [InstrStage<3, [L0]>]>,
+ InstrItinData<s_iadd , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ilog , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_ishf , [InstrStage<1, [U0, U1]>]>,
+ InstrItinData<s_cmov , [InstrStage<1, [L0, U0, L1, U1]>]>,
+ InstrItinData<s_imul , [InstrStage<7, [U1]>]>,
+ InstrItinData<s_imisc , [InstrStage<3, [U0]>]>,
+ InstrItinData<s_fbr , [InstrStage<0, [FA]>]>,
+ InstrItinData<s_fadd , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fmul , [InstrStage<6, [FM]>]>,
+ InstrItinData<s_fcmov , [InstrStage<6, [FA]>]>,
+ InstrItinData<s_fdivs , [InstrStage<12, [FA]>]>,
+ InstrItinData<s_fdivt , [InstrStage<15, [FA]>]>,
+ InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+ InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+ InstrItinData<s_ftoi , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+ InstrItinData<s_itof , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 000000000000..d5a9365d75c1
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,25 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "AlphaGenSubtarget.inc"
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS)
+ : HasCT(false) {
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 000000000000..0a944cb0a634
--- /dev/null
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,47 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class AlphaSubtarget : public TargetSubtarget {
+protected:
+
+ bool HasCT;
+
+ InstrItineraryData InstrItins;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ AlphaSubtarget(const Module &M, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.cpp b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
new file mode 100644
index 000000000000..6092ab67b5fc
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.cpp
@@ -0,0 +1,31 @@
+//===-- AlphaTargetAsmInfo.cpp - Alpha asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaTargetMachine.h"
+#include "AlphaTargetAsmInfo.h"
+
+using namespace llvm;
+
+AlphaTargetAsmInfo::AlphaTargetAsmInfo(const AlphaTargetMachine &TM)
+ : TargetAsmInfo(TM) {
+ AlignmentIsInBytes = false;
+ PrivateGlobalPrefix = "$";
+ JumpTableDirective = ".gprel32";
+ JumpTableDataSection = "\t.section .rodata\n";
+ WeakRefDirective = "\t.weak\t";
+}
+
+unsigned AlphaTargetAsmInfo::RelocBehaviour() const {
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::Global);
+}
diff --git a/lib/Target/Alpha/AlphaTargetAsmInfo.h b/lib/Target/Alpha/AlphaTargetAsmInfo.h
new file mode 100644
index 000000000000..7675b26f6e81
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetAsmInfo.h
@@ -0,0 +1,32 @@
+//=====-- AlphaTargetAsmInfo.h - Alpha asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class AlphaTargetMachine;
+
+ struct AlphaTargetAsmInfo : public TargetAsmInfo {
+ explicit AlphaTargetAsmInfo(const AlphaTargetMachine &TM);
+
+ virtual unsigned RelocBehaviour() const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 000000000000..4c830541f16a
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,126 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaJITInfo.h"
+#include "AlphaTargetAsmInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int AlphaTargetMachineModule;
+int AlphaTargetMachineModule = 0;
+
+// Register the targets
+static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
+
+const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
+ return new AlphaTargetAsmInfo(*this);
+}
+
+unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "alpha*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' &&
+ TT[3] == 'h' && TT[4] == 'a')
+ return 20;
+ // If the target triple is something non-alpha, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned AlphaTargetMachine::getJITMatchQuality() {
+#ifdef __alpha
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e-f128:128:128"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ JITInfo(*this),
+ Subtarget(M, FS),
+ TLInfo(*this) {
+ setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createAlphaISelDag(*this));
+ return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer
+ PM.add(createAlphaBranchSelectionPass());
+ return false;
+}
+bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createAlphaLLRPPass(*this));
+ PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE) {
+ PM.add(createAlphaCodeEmitterPass(*this, MCE));
+ if (DumpAsm)
+ PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ return false;
+}
+bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE) {
+ PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm)
+ PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ return false;
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ return addCodeEmitter(PM, OptLevel, DumpAsm, MCE);
+}
+bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ return addCodeEmitter(PM, OptLevel, DumpAsm, JCE);
+}
+
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 000000000000..51224e80de70
--- /dev/null
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,82 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaJITInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaSubtarget.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ AlphaInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ AlphaJITInfo JITInfo;
+ AlphaSubtarget Subtarget;
+ AlphaTargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ AlphaTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const AlphaRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual AlphaTargetLowering* getTargetLowering() const {
+ return const_cast<AlphaTargetLowering*>(&TLInfo);
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual AlphaJITInfo* getJITInfo() {
+ return &JITInfo;
+ }
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
new file mode 100644
index 000000000000..74b48ee66235
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -0,0 +1,305 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ struct VISIBILITY_HIDDEN AlphaAsmPrinter : public AsmPrinter {
+ /// Unique incrementer for label values for referencing Global values.
+ ///
+
+ explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(o, tm, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "Alpha Assembly Printer";
+ }
+ bool printInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool IsCallOp = false);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode);
+ };
+} // end of anonymous namespace
+
+/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ assert(MO.getImm() < (1 << 30));
+ } else {
+ printOp(MO);
+ }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).AsmName;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ return;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ if (GV->isDeclaration() && GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ EmitAlignment(4, F);
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl " << CurrentFnName << "\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.ent " << CurrentFnName << "\n";
+
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ ++EmittedInsts;
+ if (!printInstruction(II)) {
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ }
+ }
+ }
+
+ O << "\t.end " << CurrentFnName << "\n";
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool AlphaAsmPrinter::doInitialization(Module &M)
+{
+ if(TM.getSubtarget<AlphaSubtarget>().hasCT())
+ O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here
+ else
+ O << "\t.arch ev6\n";
+ O << "\t.set noat\n";
+ return AsmPrinter::doInitialization(M);
+}
+
+void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer()) return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ // 0: Switch to section
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ // 1: Check visibility
+ printVisibility(name, GVar->getVisibility());
+
+ // 2: Kind
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << TAI->getWeakRefDirective() << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::ExternalLinkage:
+ O << TAI->getGlobalDirective() << name << "\n";
+ break;
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ cerr << "Unknown linkage type!\n";
+ abort();
+ }
+
+ // 3: Type, Size, Align
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type\t" << name << ", @object\n";
+ O << "\t.size\t" << name << ", " << Size << "\n";
+ }
+
+ EmitAlignment(Align, GVar);
+
+ O << name << ":\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool AlphaAsmPrinter::doFinalization(Module &M) {
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ O << "0(";
+ printOperand(MI, OpNo);
+ O << ")";
+ return false;
+}
diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..b62a7f683568
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMAlphaAsmPrinter
+ AlphaAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMAlphaCodeGen n)
+
+add_dependencies(LLVMAlphaAsmPrinter ${n})
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
new file mode 100644
index 000000000000..c5b3e946695b
--- /dev/null
+++ b/lib/Target/Alpha/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Alpha/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaAsmPrinter
+
+# Hack: we need to include 'main' alpha target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
new file mode 100644
index 000000000000..1e535f7769a3
--- /dev/null
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -0,0 +1,25 @@
+set(LLVM_TARGET_DEFINITIONS Alpha.td)
+
+tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
+tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
+tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
+tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
+tablegen(AlphaGenCodeEmitter.inc -gen-emitter)
+tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
+tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
+tablegen(AlphaGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(AlphaCodeGen
+ AlphaBranchSelector.cpp
+ AlphaCodeEmitter.cpp
+ AlphaInstrInfo.cpp
+ AlphaISelDAGToDAG.cpp
+ AlphaISelLowering.cpp
+ AlphaJITInfo.cpp
+ AlphaLLRP.cpp
+ AlphaRegisterInfo.cpp
+ AlphaSubtarget.cpp
+ AlphaTargetAsmInfo.cpp
+ AlphaTargetMachine.cpp
+ )
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
new file mode 100644
index 000000000000..d6c82c7d7435
--- /dev/null
+++ b/lib/Target/Alpha/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMAlphaCodeGen
+TARGET = Alpha
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
+ AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
+ AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \
+ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
+ AlphaGenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
new file mode 100644
index 000000000000..9ae15174c582
--- /dev/null
+++ b/lib/Target/Alpha/README.txt
@@ -0,0 +1,42 @@
+***
+
+add gcc builtins for alpha instructions
+
+
+***
+
+custom expand byteswap into nifty
+extract/insert/mask byte/word/longword/quadword low/high
+sequences
+
+***
+
+see if any of the extract/insert/mask operations can be added
+
+***
+
+match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
+
+***
+
+lower srem and urem
+
+remq(i,j): i - (j * divq(i,j)) if j != 0
+remqu(i,j): i - (j * divqu(i,j)) if j != 0
+reml(i,j): i - (j * divl(i,j)) if j != 0
+remlu(i,j): i - (j * divlu(i,j)) if j != 0
+
+***
+
+add crazy vector instructions (MVI):
+
+(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
+PKWB, UNPKBW pack/unpack word to byte
+PKLB UNPKBL pack/unpack long to byte
+PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+
+this has some good examples for other operations that can be synthesised well
+from these rather meager vector ops (such as saturating add).
+http://www.alphalinux.org/docs/MVI-full.html
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 000000000000..4d7b54503e89
--- /dev/null
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3601 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <sstream>
+using namespace llvm;
+
+/// CBackendTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CBackendTargetMachineModule;
+int CBackendTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<CTargetMachine> X("c", "C backend");
+
+namespace {
+ /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
+ /// any unnamed structure types that are used by the program, and merges
+ /// external functions with the same name.
+ ///
+ class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
+ public:
+ static char ID;
+ CBackendNameAllUsedStructsAndMergeFunctions()
+ : ModulePass(&ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ }
+
+ virtual const char *getPassName() const {
+ return "C backend type canonicalizer";
+ }
+
+ virtual bool runOnModule(Module &M);
+ };
+
+ char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
+
+ /// CWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C translation unit.
+ class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+ raw_ostream &Out;
+ IntrinsicLowering *IL;
+ Mangler *Mang;
+ LoopInfo *LI;
+ const Module *TheModule;
+ const TargetAsmInfo* TAsm;
+ const TargetData* TD;
+ std::map<const Type *, std::string> TypeNames;
+ std::map<const ConstantFP *, unsigned> FPConstantMap;
+ std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+ std::set<const Argument*> ByValParams;
+ unsigned FPCounter;
+
+ public:
+ static char ID;
+ explicit CWriter(raw_ostream &o)
+ : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0),
+ TheModule(0), TAsm(0), TD(0) {
+ FPCounter = 0;
+ }
+
+ virtual const char *getPassName() const { return "C backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ bool runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LI = &getAnalysis<LoopInfo>();
+
+ // Get rid of intrinsics we can't handle.
+ lowerIntrinsics(F);
+
+ // Output all floating point constants that cannot be printed accurately.
+ printFloatingPointConstants(F);
+
+ printFunction(F);
+ return false;
+ }
+
+ virtual bool doFinalization(Module &M) {
+ // Free memory...
+ delete IL;
+ delete TD;
+ delete Mang;
+ FPConstantMap.clear();
+ TypeNames.clear();
+ ByValParams.clear();
+ intrinsicPrototypesAlreadyGenerated.clear();
+ return false;
+ }
+
+ raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ std::ostream &printType(std::ostream &Out, const Type *Ty,
+ bool isSigned = false,
+ const std::string &VariableName = "",
+ bool IgnoreName = false,
+ const AttrListPtr &PAL = AttrListPtr());
+ raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+ std::ostream &printSimpleType(std::ostream &Out, const Type *Ty,
+ bool isSigned,
+ const std::string &NameSoFar = "");
+
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *Ty);
+
+ /// writeOperandDeref - Print the result of dereferencing the specified
+ /// operand with '*'. This is equivalent to printing '*' then using
+ /// writeOperand, but avoids excess syntax in some cases.
+ void writeOperandDeref(Value *Operand) {
+ if (isAddressExposed(Operand)) {
+ // Already something with an address exposed.
+ writeOperandInternal(Operand);
+ } else {
+ Out << "*(";
+ writeOperand(Operand);
+ Out << ")";
+ }
+ }
+
+ void writeOperand(Value *Operand, bool Static = false);
+ void writeInstComputationInline(Instruction &I);
+ void writeOperandInternal(Value *Operand, bool Static = false);
+ void writeOperandWithCast(Value* Operand, unsigned Opcode);
+ void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+ bool writeInstructionCast(const Instruction &I);
+
+ void writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment);
+
+ private :
+ std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+ void lowerIntrinsics(Function &F);
+
+ void printModule(Module *M);
+ void printModuleTypes(const TypeSymbolTable &ST);
+ void printContainedStructs(const Type *Ty, std::set<const Type *> &);
+ void printFloatingPointConstants(Function &F);
+ void printFloatingPointConstants(const Constant *C);
+ void printFunctionSignature(const Function *F, bool Prototype);
+
+ void printFunction(Function &);
+ void printBasicBlock(BasicBlock *BB);
+ void printLoop(Loop *L);
+
+ void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+ void printConstant(Constant *CPV, bool Static);
+ void printConstantWithCast(Constant *CPV, unsigned Opcode);
+ bool printConstExprCast(const ConstantExpr *CE, bool Static);
+ void printConstantArray(ConstantArray *CPA, bool Static);
+ void printConstantVector(ConstantVector *CV, bool Static);
+
+ /// isAddressExposed - Return true if the specified value's name needs to
+ /// have its address taken in order to get a C value of the correct type.
+ /// This happens for global variables, byval parameters, and direct allocas.
+ bool isAddressExposed(const Value *V) const {
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return ByValParams.count(A);
+ return isa<GlobalVariable>(V) || isDirectAlloca(V);
+ }
+
+ // isInlinableInst - Attempt to inline instructions into their uses to build
+ // trees as much as possible. To do this, we have to consistently decide
+ // what is acceptable to inline, so that variable declarations don't get
+ // printed and an extra copy of the expr is not emitted.
+ //
+ static bool isInlinableInst(const Instruction &I) {
+ // Always inline cmp instructions, even if they are shared by multiple
+ // expressions. GCC generates horrible code if we don't.
+ if (isa<CmpInst>(I))
+ return true;
+
+ // Must be an expression, must be used exactly once. If it is dead, we
+ // emit it inline where it would go.
+ if (I.getType() == Type::VoidTy || !I.hasOneUse() ||
+ isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+ isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+ isa<InsertValueInst>(I))
+ // Don't inline a load across a store or other bad things!
+ return false;
+
+ // Must not be used in inline asm, extractelement, or shufflevector.
+ if (I.hasOneUse()) {
+ const Instruction &User = cast<Instruction>(*I.use_back());
+ if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+ isa<ShuffleVectorInst>(User))
+ return false;
+ }
+
+ // Only inline instruction it if it's use is in the same BB as the inst.
+ return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+ }
+
+ // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+ // variables which are accessed with the & operator. This causes GCC to
+ // generate significantly better code than to emit alloca calls directly.
+ //
+ static const AllocaInst *isDirectAlloca(const Value *V) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI) return false;
+ if (AI->isArrayAllocation())
+ return 0; // FIXME: we can also inline fixed size array allocas!
+ if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+ return 0;
+ return AI;
+ }
+
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+ static bool isInlineAsm(const Instruction& I) {
+ if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0)))
+ return true;
+ return false;
+ }
+
+ // Instruction visitation functions
+ friend class InstVisitor<CWriter>;
+
+ void visitReturnInst(ReturnInst &I);
+ void visitBranchInst(BranchInst &I);
+ void visitSwitchInst(SwitchInst &I);
+ void visitInvokeInst(InvokeInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+
+ void visitUnwindInst(UnwindInst &I) {
+ assert(0 && "Lowerinvoke pass didn't work!");
+ }
+ void visitUnreachableInst(UnreachableInst &I);
+
+ void visitPHINode(PHINode &I);
+ void visitBinaryOperator(Instruction &I);
+ void visitICmpInst(ICmpInst &I);
+ void visitFCmpInst(FCmpInst &I);
+
+ void visitCastInst (CastInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitCallInst (CallInst &I);
+ void visitInlineAsm(CallInst &I);
+ bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+ void visitMallocInst(MallocInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitFreeInst (FreeInst &I);
+ void visitLoadInst (LoadInst &I);
+ void visitStoreInst (StoreInst &I);
+ void visitGetElementPtrInst(GetElementPtrInst &I);
+ void visitVAArgInst (VAArgInst &I);
+
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+
+ void visitInstruction(Instruction &I) {
+ cerr << "C Writer does not know about " << I;
+ abort();
+ }
+
+ void outputLValue(Instruction *I) {
+ Out << " " << GetValueName(I) << " = ";
+ }
+
+ bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+ void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+ BasicBlock *Successor, unsigned Indent);
+ void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+ unsigned Indent);
+ void printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static);
+
+ std::string GetValueName(const Value *Operand);
+ };
+}
+
+char CWriter::ID = 0;
+
+/// This method inserts names for any unnamed structure types that are used by
+/// the program, and removes names from structure types that are not used by the
+/// program.
+///
+bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
+ // Get a set of types that are used by the program...
+ std::set<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
+
+ // Loop over the module symbol table, removing types from UT that are
+ // already named, and removing names for types that are not used.
+ //
+ TypeSymbolTable &TST = M.getTypeSymbolTable();
+ for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
+ TI != TE; ) {
+ TypeSymbolTable::iterator I = TI++;
+
+ // If this isn't a struct or array type, remove it from our set of types
+ // to name. This simplifies emission later.
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) &&
+ !isa<ArrayType>(I->second)) {
+ TST.remove(I);
+ } else {
+ // If this is not used, remove it from the symbol table.
+ std::set<const Type *>::iterator UTI = UT.find(I->second);
+ if (UTI == UT.end())
+ TST.remove(I);
+ else
+ UT.erase(UTI); // Only keep one name for this type.
+ }
+ }
+
+ // UT now contains types that are not named. Loop over it, naming
+ // structure types.
+ //
+ bool Changed = false;
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
+ I != E; ++I)
+ if (isa<StructType>(*I) || isa<ArrayType>(*I)) {
+ while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
+ ++RenameCounter;
+ Changed = true;
+ }
+
+
+ // Loop over all external functions and globals. If we have two with
+ // identical names, merge them.
+ // FIXME: This code should disappear when we don't allow values with the same
+ // names when they have different types!
+ std::map<std::string, GlobalValue*> ExtSymbols;
+ for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+ Function *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+ // Do the same for globals.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->isDeclaration() && GV->hasName()) {
+ std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+ = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+ if (!X.second) {
+ // Found a conflict, replace this global with the previous one.
+ GlobalValue *OldGV = X.first->second;
+ GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+ GV->eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+ const AttrListPtr &PAL,
+ const PointerType *TheTy) {
+ const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (*) (";
+ bool PrintedType = false;
+
+ FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+ const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+ unsigned Idx = 1;
+ for (++I, ++Idx; I != E; ++I, ++Idx) {
+ if (PrintedType)
+ FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ PrintedType = true;
+ }
+ if (FTy->isVarArg()) {
+ if (PrintedType)
+ FunctionInnards << ", ...";
+ } else if (!PrintedType) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+ cerr << "Unknown primitive type: " << *Ty << "\n";
+ abort();
+ }
+}
+
+std::ostream &
+CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
+ const std::string &NameSoFar) {
+ assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) &&
+ "Invalid type for printSimpleType");
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return Out << "void " << NameSoFar;
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits == 1)
+ return Out << "bool " << NameSoFar;
+ else if (NumBits <= 8)
+ return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+ else if (NumBits <= 16)
+ return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+ else if (NumBits <= 32)
+ return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+ else if (NumBits <= 64)
+ return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+ else {
+ assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+ return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+ }
+ }
+ case Type::FloatTyID: return Out << "float " << NameSoFar;
+ case Type::DoubleTyID: return Out << "double " << NameSoFar;
+ // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+ // present matches host 'long double'.
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: return Out << "long double " << NameSoFar;
+
+ case Type::VectorTyID: {
+ const VectorType *VTy = cast<VectorType>(Ty);
+ return printSimpleType(Out, VTy->getElementType(), isSigned,
+ " __attribute__((vector_size(" +
+ utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+ }
+
+ default:
+ cerr << "Unknown primitive type: " << *Ty << "\n";
+ abort();
+ }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ // Check to see if the type is named.
+ if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams())
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (isa<ArrayType>(PTy->getElementType()) ||
+ isa<VectorType>(PTy->getElementType()))
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ case Type::OpaqueTyID: {
+ static int Count = 0;
+ std::string TyName = "struct opaque_" + itostr(Count++);
+ assert(TypeNames.find(Ty) == TypeNames.end());
+ TypeNames[Ty] = TyName;
+ return Out << TyName << ' ' << NameSoFar;
+ }
+ default:
+ assert(0 && "Unhandled case in getTypeProps!");
+ abort();
+ }
+
+ return Out;
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
+ bool isSigned, const std::string &NameSoFar,
+ bool IgnoreName, const AttrListPtr &PAL) {
+ if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+ printSimpleType(Out, Ty, isSigned, NameSoFar);
+ return Out;
+ }
+
+ // Check to see if the type is named.
+ if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+ }
+
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType *FTy = cast<FunctionType>(Ty);
+ std::stringstream FunctionInnards;
+ FunctionInnards << " (" << NameSoFar << ") (";
+ unsigned Idx = 1;
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I) {
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ if (I != FTy->param_begin())
+ FunctionInnards << ", ";
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+ ++Idx;
+ }
+ if (FTy->isVarArg()) {
+ if (FTy->getNumParams())
+ FunctionInnards << ", ...";
+ } else if (!FTy->getNumParams()) {
+ FunctionInnards << "void";
+ }
+ FunctionInnards << ')';
+ std::string tstr = FunctionInnards.str();
+ printType(Out, FTy->getReturnType(),
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ return Out;
+ }
+ case Type::StructTyID: {
+ const StructType *STy = cast<StructType>(Ty);
+ Out << NameSoFar + " {\n";
+ unsigned Idx = 0;
+ for (StructType::element_iterator I = STy->element_begin(),
+ E = STy->element_end(); I != E; ++I) {
+ Out << " ";
+ printType(Out, *I, false, "field" + utostr(Idx++));
+ Out << ";\n";
+ }
+ Out << '}';
+ if (STy->isPacked())
+ Out << " __attribute__ ((packed))";
+ return Out;
+ }
+
+ case Type::PointerTyID: {
+ const PointerType *PTy = cast<PointerType>(Ty);
+ std::string ptrName = "*" + NameSoFar;
+
+ if (isa<ArrayType>(PTy->getElementType()) ||
+ isa<VectorType>(PTy->getElementType()))
+ ptrName = "(" + ptrName + ")";
+
+ if (!PAL.isEmpty())
+ // Must be a function ptr cast!
+ return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+ return printType(Out, PTy->getElementType(), false, ptrName);
+ }
+
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ unsigned NumElements = ATy->getNumElements();
+ if (NumElements == 0) NumElements = 1;
+ // Arrays are wrapped in structs to allow them to have normal
+ // value semantics (avoiding the array "decay").
+ Out << NameSoFar << " { ";
+ printType(Out, ATy->getElementType(), false,
+ "array[" + utostr(NumElements) + "]");
+ return Out << "; }";
+ }
+
+ case Type::OpaqueTyID: {
+ static int Count = 0;
+ std::string TyName = "struct opaque_" + itostr(Count++);
+ assert(TypeNames.find(Ty) == TypeNames.end());
+ TypeNames[Ty] = TyName;
+ return Out << TyName << ' ' << NameSoFar;
+ }
+ default:
+ assert(0 && "Unhandled case in getTypeProps!");
+ abort();
+ }
+
+ return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+ // As a special case, print the array as a string if it is an array of
+ // ubytes or an array of sbytes with positive values.
+ //
+ const Type *ETy = CPA->getType()->getElementType();
+ bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty);
+
+ // Make sure the last character is a null char, as automatically added by C
+ if (isString && (CPA->getNumOperands() == 0 ||
+ !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+ isString = false;
+
+ if (isString) {
+ Out << '\"';
+ // Keep track of whether the last number was a hexadecimal escape
+ bool LastWasHex = false;
+
+ // Do not include the last character, which we know is null
+ for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+ unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+ // Print it out literally if it is a printable character. The only thing
+ // to be careful about is when the last letter output was a hex escape
+ // code, in which case we have to be careful not to print out hex digits
+ // explicitly (the C compiler thinks it is a continuation of the previous
+ // character, sheesh...)
+ //
+ if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+ LastWasHex = false;
+ if (C == '"' || C == '\\')
+ Out << "\\" << (char)C;
+ else
+ Out << (char)C;
+ } else {
+ LastWasHex = false;
+ switch (C) {
+ case '\n': Out << "\\n"; break;
+ case '\t': Out << "\\t"; break;
+ case '\r': Out << "\\r"; break;
+ case '\v': Out << "\\v"; break;
+ case '\a': Out << "\\a"; break;
+ case '\"': Out << "\\\""; break;
+ case '\'': Out << "\\\'"; break;
+ default:
+ Out << "\\x";
+ Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+ Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ LastWasHex = true;
+ break;
+ }
+ }
+ }
+ Out << '\"';
+ } else {
+ Out << '{';
+ if (CPA->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+ Out << '{';
+ if (CP->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CP->getOperand(0)), Static);
+ for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CP->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+ bool ignored;
+ // Do long doubles in hex for now.
+ if (CFP->getType() != Type::FloatTy && CFP->getType() != Type::DoubleTy)
+ return false;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::FloatTy)
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%a", APF.convertToDouble());
+ if (!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3))
+ return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+ return false;
+#else
+ std::string StrVal = ftostr(APF);
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like "Inf"
+ // or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9')))
+ // Reparse stringized version!
+ return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+ return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+ // Print the destination type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::IntToPtr:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+ Out << '(';
+ printType(Out, DstTy);
+ Out << ')';
+ break;
+ case Instruction::ZExt:
+ case Instruction::PtrToInt:
+ case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+ Out << '(';
+ printSimpleType(Out, DstTy, false);
+ Out << ')';
+ break;
+ case Instruction::SExt:
+ case Instruction::FPToSI: // For these, make sure we get a signed dest
+ Out << '(';
+ printSimpleType(Out, DstTy, true);
+ Out << ')';
+ break;
+ default:
+ assert(0 && "Invalid cast opcode");
+ }
+
+ // Print the source type cast
+ switch (opc) {
+ case Instruction::UIToFP:
+ case Instruction::ZExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, false);
+ Out << ')';
+ break;
+ case Instruction::SIToFP:
+ case Instruction::SExt:
+ Out << '(';
+ printSimpleType(Out, SrcTy, true);
+ Out << ')';
+ break;
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ // Avoid "cast to pointer from integer of different size" warnings
+ Out << "(unsigned long)";
+ break;
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ break; // These don't need a source cast.
+ default:
+ assert(0 && "Invalid cast opcode");
+ break;
+ }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Out << "(";
+ printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+ if (CE->getOpcode() == Instruction::SExt &&
+ CE->getOperand(0)->getType() == Type::Int1Ty) {
+ // Make sure we really sext from bool here by subtracting from 0
+ Out << "0-";
+ }
+ printConstant(CE->getOperand(0), Static);
+ if (CE->getType() == Type::Int1Ty &&
+ (CE->getOpcode() == Instruction::Trunc ||
+ CE->getOpcode() == Instruction::FPToUI ||
+ CE->getOpcode() == Instruction::FPToSI ||
+ CE->getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really truncate to bool here by anding with 1
+ Out << "&1u";
+ }
+ Out << ')';
+ return;
+
+ case Instruction::GetElementPtr:
+ Out << "(";
+ printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+ gep_type_end(CPV), Static);
+ Out << ")";
+ return;
+ case Instruction::Select:
+ Out << '(';
+ printConstant(CE->getOperand(0), Static);
+ Out << '?';
+ printConstant(CE->getOperand(1), Static);
+ Out << ':';
+ printConstant(CE->getOperand(2), Static);
+ Out << ')';
+ return;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl: Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ case Instruction::ICmp:
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: Out << " < "; break;
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE: Out << " <= "; break;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: Out << " > "; break;
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE: Out << " >= "; break;
+ default: assert(0 && "Illegal ICmp predicate");
+ }
+ break;
+ default: assert(0 && "Illegal opcode here!");
+ }
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ case Instruction::FCmp: {
+ Out << '(';
+ bool NeedsClosingParens = printConstExprCast(CE, Static);
+ if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+ Out << "0";
+ else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+ Out << "1";
+ else {
+ const char* op = 0;
+ switch (CE->getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+ Out << "llvm_fcmp_" << op << "(";
+ printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+ Out << ", ";
+ printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+ Out << ")";
+ }
+ if (NeedsClosingParens)
+ Out << "))";
+ Out << ')';
+ return;
+ }
+ default:
+ cerr << "CWriter Error: Unhandled constant expression: "
+ << *CE << "\n";
+ abort();
+ }
+ } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*UNDEF*/";
+ if (!isa<VectorType>(CPV->getType())) {
+ Out << "0)";
+ } else {
+ Out << "{})";
+ }
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ const Type* Ty = CI->getType();
+ if (Ty == Type::Int1Ty)
+ Out << (CI->getZExtValue() ? '1' : '0');
+ else if (Ty == Type::Int32Ty)
+ Out << CI->getZExtValue() << 'u';
+ else if (Ty->getPrimitiveSizeInBits() > 32)
+ Out << CI->getZExtValue() << "ull";
+ else {
+ Out << "((";
+ printSimpleType(Out, Ty, false) << ')';
+ if (CI->isMinValue(true))
+ Out << CI->getZExtValue() << 'u';
+ else
+ Out << CI->getSExtValue();
+ Out << ')';
+ }
+ return;
+ }
+
+ switch (CPV->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ case Type::X86_FP80TyID:
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID: {
+ ConstantFP *FPC = cast<ConstantFP>(CPV);
+ std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+ if (I != FPConstantMap.end()) {
+ // Because of FP precision problems we must load from a stack allocated
+ // value that holds the value in hex.
+ Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" :
+ FPC->getType() == Type::DoubleTy ? "double" :
+ "long double")
+ << "*)&FPConstant" << I->second << ')';
+ } else {
+ double V;
+ if (FPC->getType() == Type::FloatTy)
+ V = FPC->getValueAPF().convertToFloat();
+ else if (FPC->getType() == Type::DoubleTy)
+ V = FPC->getValueAPF().convertToDouble();
+ else {
+ // Long double. Convert the number to double, discarding precision.
+ // This is not awesome, but it at least makes the CBE output somewhat
+ // useful.
+ APFloat Tmp = FPC->getValueAPF();
+ bool LosesInfo;
+ Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+ V = Tmp.convertToDouble();
+ }
+
+ if (IsNAN(V)) {
+ // The value is NaN
+
+ // FIXME the actual NaN bits should be emitted.
+ // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+ // it's 0x7ff4.
+ const unsigned long QuietNaN = 0x7ff8UL;
+ //const unsigned long SignalNaN = 0x7ff4UL;
+
+ // We need to grab the first part of the FP #
+ char Buffer[100];
+
+ uint64_t ll = DoubleToBits(V);
+ sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+ std::string Num(&Buffer[0], &Buffer[6]);
+ unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+ if (FPC->getType() == Type::FloatTy)
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+ << Buffer << "\") /*nan*/ ";
+ else
+ Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+ << Buffer << "\") /*nan*/ ";
+ } else if (IsInf(V)) {
+ // The value is Inf
+ if (V < 0) Out << '-';
+ Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "")
+ << " /*inf*/ ";
+ } else {
+ std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+ // Print out the constant as a floating point number.
+ char Buffer[100];
+ sprintf(Buffer, "%a", V);
+ Num = Buffer;
+#else
+ Num = ftostr(FPC->getValueAPF());
+#endif
+ Out << Num;
+ }
+ }
+ break;
+ }
+
+ case Type::ArrayTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ Out << "{ "; // Arrays are wrapped in struct types.
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+ printConstantArray(CA, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const ArrayType *AT = cast<ArrayType>(CPV->getType());
+ Out << '{';
+ if (AT->getNumElements()) {
+ Out << ' ';
+ Constant *CZ = Constant::getNullValue(AT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ }
+ Out << " }";
+ }
+ Out << " }"; // Arrays are wrapped in struct types.
+ break;
+
+ case Type::VectorTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+ printConstantVector(CV, Static);
+ } else {
+ assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+ const VectorType *VT = cast<VectorType>(CPV->getType());
+ Out << "{ ";
+ Constant *CZ = Constant::getNullValue(VT->getElementType());
+ printConstant(CZ, Static);
+ for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(CZ, Static);
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::StructTyID:
+ // Use C99 compound expression literal initializer syntax.
+ if (!Static) {
+ Out << "(";
+ printType(Out, CPV->getType());
+ Out << ")";
+ }
+ if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+ const StructType *ST = cast<StructType>(CPV->getType());
+ Out << '{';
+ if (ST->getNumElements()) {
+ Out << ' ';
+ printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+ for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+ Out << ", ";
+ printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+ }
+ }
+ Out << " }";
+ } else {
+ Out << '{';
+ if (CPV->getNumOperands()) {
+ Out << ' ';
+ printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+ for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+ Out << ", ";
+ printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+ }
+ }
+ Out << " }";
+ }
+ break;
+
+ case Type::PointerTyID:
+ if (isa<ConstantPointerNull>(CPV)) {
+ Out << "((";
+ printType(Out, CPV->getType()); // sign doesn't matter
+ Out << ")/*NULL*/0)";
+ break;
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+ writeOperand(GV, Static);
+ break;
+ }
+ // FALL THROUGH
+ default:
+ cerr << "Unknown constant type: " << *CPV << "\n";
+ abort();
+ }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+ bool NeedsExplicitCast = false;
+ const Type *Ty = CE->getOperand(0)->getType();
+ bool TypeIsSigned = false;
+ switch (CE->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!Ty->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv: NeedsExplicitCast = true; break;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+ case Instruction::SExt:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ TypeIsSigned = true;
+ break;
+ case Instruction::ZExt:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ Ty = CE->getType();
+ NeedsExplicitCast = true;
+ break;
+ default: break;
+ }
+ if (NeedsExplicitCast) {
+ Out << "((";
+ if (Ty->isInteger() && Ty != Type::Int1Ty)
+ printSimpleType(Out, Ty, TypeIsSigned);
+ else
+ printType(Out, Ty); // not integer, sign doesn't matter
+ Out << ")(";
+ }
+ return NeedsExplicitCast;
+}
+
+// Print a constant assuming that it is the operand for a given Opcode. The
+// opcodes that care about sign need to cast their operands to the expected
+// type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = CPV->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+ bool typeIsSigned = false;
+
+ // Based on the Opcode for which this Constant is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+ // casted below.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!OpTy->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ shouldCast = true;
+ break;
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ shouldCast = true;
+ typeIsSigned = true;
+ break;
+ }
+
+ // Write out the casted constant if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, typeIsSigned);
+ Out << ")";
+ printConstant(CPV, false);
+ Out << ")";
+ } else
+ printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+ std::string Name;
+
+ if (!isa<GlobalValue>(Operand) && Operand->getName() != "") {
+ std::string VarName;
+
+ Name = Operand->getName();
+ VarName.reserve(Name.capacity());
+
+ for (std::string::iterator I = Name.begin(), E = Name.end();
+ I != E; ++I) {
+ char ch = *I;
+
+ if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') || ch == '_')) {
+ char buffer[5];
+ sprintf(buffer, "_%x_", ch);
+ VarName += buffer;
+ } else
+ VarName += ch;
+ }
+
+ Name = "llvm_cbe_" + VarName;
+ } else {
+ Name = Mang->getValueName(Operand);
+ }
+
+ return Name;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+ // If this is a non-trivial bool computation, make sure to truncate down to
+ // a 1 bit value. This is important because we want "add i1 x, y" to return
+ // "0" when x and y are true, not "2" for example.
+ bool NeedBoolTrunc = false;
+ if (I.getType() == Type::Int1Ty && !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+ NeedBoolTrunc = true;
+
+ if (NeedBoolTrunc)
+ Out << "((";
+
+ visit(I);
+
+ if (NeedBoolTrunc)
+ Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+ if (Instruction *I = dyn_cast<Instruction>(Operand))
+ // Should we inline this instruction to build a tree?
+ if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+ Out << '(';
+ writeInstComputationInline(*I);
+ Out << ')';
+ return;
+ }
+
+ Constant* CPV = dyn_cast<Constant>(Operand);
+
+ if (CPV && !isa<GlobalValue>(CPV))
+ printConstant(CPV, Static);
+ else
+ Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+ bool isAddressImplicit = isAddressExposed(Operand);
+ if (isAddressImplicit)
+ Out << "(&"; // Global variables are referenced as their addresses by llvm
+
+ writeOperandInternal(Operand, Static);
+
+ if (isAddressImplicit)
+ Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+ const Type *Ty = I.getOperand(0)->getType();
+ switch (I.getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!Ty->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::URem:
+ case Instruction::UDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, false);
+ Out << ")(";
+ return true;
+ case Instruction::AShr:
+ case Instruction::SRem:
+ case Instruction::SDiv:
+ Out << "((";
+ printSimpleType(Out, Ty, true);
+ Out << ")(";
+ return true;
+ default: break;
+ }
+ return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+ // Extract the operand's type, we'll need it.
+ const Type* OpTy = Operand->getType();
+
+ // Indicate whether to do the cast or not.
+ bool shouldCast = false;
+
+ // Indicate whether the cast should be to a signed type or not.
+ bool castIsSigned = false;
+
+ // Based on the Opcode for which this Operand is being written, determine
+ // the new type to which the operand should be casted by setting the value
+ // of OpTy. If we change OpTy, also set shouldCast to true.
+ switch (Opcode) {
+ default:
+ // for most instructions, it doesn't matter
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ // We need to cast integer arithmetic so that it is always performed
+ // as unsigned, to avoid undefined behavior on overflow.
+ if (!OpTy->isIntOrIntVector()) break;
+ // FALL THROUGH
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::URem: // Cast to unsigned first
+ shouldCast = true;
+ castIsSigned = false;
+ break;
+ case Instruction::GetElementPtr:
+ case Instruction::AShr:
+ case Instruction::SDiv:
+ case Instruction::SRem: // Cast to signed first
+ shouldCast = true;
+ castIsSigned = true;
+ break;
+ }
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (shouldCast) {
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+ } else
+ writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+ // This has to do a cast to ensure the operand has the right signedness.
+ // Also, if the operand is a pointer, we make sure to cast to an integer when
+ // doing the comparison both for signedness and so that the C compiler doesn't
+ // optimize things like "p < NULL" to false (p may contain an integer value
+ // f.e.).
+ bool shouldCast = Cmp.isRelational();
+
+ // Write out the casted operand if we should, otherwise just write the
+ // operand.
+ if (!shouldCast) {
+ writeOperand(Operand);
+ return;
+ }
+
+ // Should this be a signed comparison? If so, convert to signed.
+ bool castIsSigned = Cmp.isSignedPredicate();
+
+ // If the operand was a pointer, convert to a large integer type.
+ const Type* OpTy = Operand->getType();
+ if (isa<PointerType>(OpTy))
+ OpTy = TD->getIntPtrType();
+
+ Out << "((";
+ printSimpleType(Out, OpTy, castIsSigned);
+ Out << ")";
+ writeOperand(Operand);
+ Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(raw_ostream& Out,
+ const TargetData *TD) {
+ // Alloca is hard to get, and we don't want to include stdlib.h here.
+ Out << "/* get a declaration for alloca */\n"
+ << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+ << "#define alloca(x) __builtin_alloca((x))\n"
+ << "#define _alloca(x) __builtin_alloca((x))\n"
+ << "#elif defined(__APPLE__)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#define longjmp _longjmp\n"
+ << "#define setjmp _setjmp\n"
+ << "#elif defined(__sun__)\n"
+ << "#if defined(__sparcv9)\n"
+ << "extern void *__builtin_alloca(unsigned long);\n"
+ << "#else\n"
+ << "extern void *__builtin_alloca(unsigned int);\n"
+ << "#endif\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)\n"
+ << "#define alloca(x) __builtin_alloca(x)\n"
+ << "#elif defined(_MSC_VER)\n"
+ << "#define inline _inline\n"
+ << "#define alloca(x) _alloca(x)\n"
+ << "#else\n"
+ << "#include <alloca.h>\n"
+ << "#endif\n\n";
+
+ // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+ // If we aren't being compiled with GCC, just drop these attributes.
+ Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n"
+ << "#define __attribute__(X)\n"
+ << "#endif\n\n";
+
+ // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __EXTERNAL_WEAK__\n"
+ << "#endif\n\n";
+
+ // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+ Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#elif defined(__GNUC__)\n"
+ << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+ << "#else\n"
+ << "#define __ATTRIBUTE_WEAK__\n"
+ << "#endif\n\n";
+
+ // Add hidden visibility support. FIXME: APPLE_CC?
+ Out << "#if defined(__GNUC__)\n"
+ << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+ << "#endif\n\n";
+
+ // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+ // From the GCC documentation:
+ //
+ // double __builtin_nan (const char *str)
+ //
+ // This is an implementation of the ISO C99 function nan.
+ //
+ // Since ISO C99 defines this function in terms of strtod, which we do
+ // not implement, a description of the parsing is in order. The string is
+ // parsed as by strtol; that is, the base is recognized by leading 0 or
+ // 0x prefixes. The number parsed is placed in the significand such that
+ // the least significant bit of the number is at the least significant
+ // bit of the significand. The number is truncated to fit the significand
+ // field provided. The significand is forced to be a quiet NaN.
+ //
+ // This function, if given a string literal, is evaluated early enough
+ // that it is considered a compile-time constant.
+ //
+ // float __builtin_nanf (const char *str)
+ //
+ // Similar to __builtin_nan, except the return type is float.
+ //
+ // double __builtin_inf (void)
+ //
+ // Similar to __builtin_huge_val, except a warning is generated if the
+ // target floating-point format does not support infinities. This
+ // function is suitable for implementing the ISO C99 macro INFINITY.
+ //
+ // float __builtin_inff (void)
+ //
+ // Similar to __builtin_inf, except the return type is float.
+ Out << "#ifdef __GNUC__\n"
+ << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n"
+ << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+ << "#define LLVM_INF __builtin_inf() /* Double */\n"
+ << "#define LLVM_INFF __builtin_inff() /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) "
+ "__builtin_prefetch(addr,rw,locality)\n"
+ << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+ << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+ << "#define LLVM_ASM __asm__\n"
+ << "#else\n"
+ << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n"
+ << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n"
+ << "#define LLVM_INF ((double)0.0) /* Double */\n"
+ << "#define LLVM_INFF 0.0F /* Float */\n"
+ << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n"
+ << "#define __ATTRIBUTE_CTOR__\n"
+ << "#define __ATTRIBUTE_DTOR__\n"
+ << "#define LLVM_ASM(X)\n"
+ << "#endif\n\n";
+
+ Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+ << "#define __builtin_stack_save() 0 /* not implemented */\n"
+ << "#define __builtin_stack_restore(X) /* noop */\n"
+ << "#endif\n\n";
+
+ // Output typedefs for 128-bit integers. If these are needed with a
+ // 32-bit target or with a C compiler that doesn't support mode(TI),
+ // more drastic measures will be needed.
+ Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+ << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+ << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+ << "#endif\n\n";
+
+ // Output target-specific code that should be inserted into main.
+ Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!InitList) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+ if (CS->getOperand(1)->isNullValue())
+ return; // Found a null terminator, exit printing.
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast())
+ FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP))
+ StaticTors.insert(F);
+ }
+}
+
+enum SpecialGlobalClass {
+ NotSpecial = 0,
+ GlobalCtors, GlobalDtors,
+ NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+ // If this is a global ctors/dtors list, handle it now.
+ if (GV->hasAppendingLinkage() && GV->use_empty()) {
+ if (GV->getName() == "llvm.global_ctors")
+ return GlobalCtors;
+ else if (GV->getName() == "llvm.global_dtors")
+ return GlobalDtors;
+ }
+
+ // Otherwise, it it is other metadata, don't print it. This catches things
+ // like debug information.
+ if (GV->getSection() == "llvm.metadata")
+ return NotPrinted;
+
+ return NotSpecial;
+}
+
+
+bool CWriter::doInitialization(Module &M) {
+ // Initialize
+ TheModule = &M;
+
+ TD = new TargetData(&M);
+ IL = new IntrinsicLowering(*TD);
+ IL->AddPrototypes(M);
+
+ // Ensure that all structure types have names...
+ Mang = new Mangler(M);
+ Mang->markCharUnacceptable('.');
+
+ // Keep track of which functions are static ctors/dtors so they can have
+ // an attribute added to their prototypes.
+ std::set<Function*> StaticCtors, StaticDtors;
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ switch (getGlobalVariableClass(I)) {
+ default: break;
+ case GlobalCtors:
+ FindStaticTors(I, StaticCtors);
+ break;
+ case GlobalDtors:
+ FindStaticTors(I, StaticDtors);
+ break;
+ }
+ }
+
+ // get declaration for alloca
+ Out << "/* Provide Declarations */\n";
+ Out << "#include <stdarg.h>\n"; // Varargs support
+ Out << "#include <setjmp.h>\n"; // Unwind support
+ generateCompilerSpecificCode(Out, TD);
+
+ // Provide a definition for `bool' if not compiling with a C++ compiler.
+ Out << "\n"
+ << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+ << "\n\n/* Support for floating point constants */\n"
+ << "typedef unsigned long long ConstantDoubleTy;\n"
+ << "typedef unsigned int ConstantFloatTy;\n"
+ << "typedef struct { unsigned long long f1; unsigned short f2; "
+ "unsigned short pad[3]; } ConstantFP80Ty;\n"
+ // This is used for both kinds of 128-bit long double; meaning differs.
+ << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+ " ConstantFP128Ty;\n"
+ << "\n\n/* Global Declarations */\n";
+
+ // First output all the declarations for the program, because C requires
+ // Functions & globals to be declared before they are used.
+ //
+
+ // Loop over the symbol table, emitting all named constants...
+ printModuleTypes(M.getTypeSymbolTable());
+
+ // Global variable declarations...
+ if (!M.global_empty()) {
+ Out << "\n/* External Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+
+ if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+ I->hasCommonLinkage())
+ Out << "extern ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else
+ continue; // Internal Global
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ Out << ";\n";
+ }
+ }
+
+ // Function declarations
+ Out << "\n/* Function Declarations */\n";
+ Out << "double fmod(double, double);\n"; // Support for FP rem
+ Out << "float fmodf(float, float);\n";
+ Out << "long double fmodl(long double, long double);\n";
+
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ // Don't print declarations for intrinsic functions.
+ if (!I->isIntrinsic() && I->getName() != "setjmp" &&
+ I->getName() != "longjmp" && I->getName() != "_setjmp") {
+ if (I->hasExternalWeakLinkage())
+ Out << "extern ";
+ printFunctionSignature(I, true);
+ if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (StaticCtors.count(I))
+ Out << " __ATTRIBUTE_CTOR__";
+ if (StaticDtors.count(I))
+ Out << " __ATTRIBUTE_DTOR__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ if (I->hasName() && I->getName()[0] == 1)
+ Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")";
+
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable declarations
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Declarations */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else
+ Out << "extern ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasCommonLinkage()) // FIXME is this right?
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasExternalWeakLinkage())
+ Out << " __EXTERNAL_WEAK__";
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+ Out << ";\n";
+ }
+ }
+
+ // Output the global variable definitions and contents...
+ if (!M.global_empty()) {
+ Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ if (!I->isDeclaration()) {
+ // Ignore special globals, such as debug info.
+ if (getGlobalVariableClass(I))
+ continue;
+
+ if (I->hasLocalLinkage())
+ Out << "static ";
+ else if (I->hasDLLImportLinkage())
+ Out << "__declspec(dllimport) ";
+ else if (I->hasDLLExportLinkage())
+ Out << "__declspec(dllexport) ";
+
+ // Thread Local Storage
+ if (I->isThreadLocal())
+ Out << "__thread ";
+
+ printType(Out, I->getType()->getElementType(), false,
+ GetValueName(I));
+ if (I->hasLinkOnceLinkage())
+ Out << " __attribute__((common))";
+ else if (I->hasWeakLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+ else if (I->hasCommonLinkage())
+ Out << " __ATTRIBUTE_WEAK__";
+
+ if (I->hasHiddenVisibility())
+ Out << " __HIDDEN__";
+
+ // If the initializer is not null, emit the initializer. If it is null,
+ // we try to avoid emitting large amounts of zeros. The problem with
+ // this, however, occurs when the variable has weak linkage. In this
+ // case, the assembler will complain about the variable being both weak
+ // and common, so we disable this optimization.
+ // FIXME common linkage should avoid this problem.
+ if (!I->getInitializer()->isNullValue()) {
+ Out << " = " ;
+ writeOperand(I->getInitializer(), true);
+ } else if (I->hasWeakLinkage()) {
+ // We have to specify an initializer, but it doesn't have to be
+ // complete. If the value is an aggregate, print out { 0 }, and let
+ // the compiler figure out the rest of the zeros.
+ Out << " = " ;
+ if (isa<StructType>(I->getInitializer()->getType()) ||
+ isa<VectorType>(I->getInitializer()->getType())) {
+ Out << "{ 0 }";
+ } else if (isa<ArrayType>(I->getInitializer()->getType())) {
+ // As with structs and vectors, but with an extra set of braces
+ // because arrays are wrapped in structs.
+ Out << "{ { 0 } }";
+ } else {
+ // Just print it out normally.
+ writeOperand(I->getInitializer(), true);
+ }
+ }
+ Out << ";\n";
+ }
+ }
+
+ if (!M.empty())
+ Out << "\n\n/* Function Bodies */\n";
+
+ // Emit some helper functions for dealing with FCMP instruction's
+ // predicates
+ Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+ Out << "return X == X && Y == Y; }\n";
+ Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+ Out << "return X != X || Y != Y; }\n";
+ Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+ Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+ Out << "return X != Y; }\n";
+ Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+ Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+ Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+ Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+ Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+ Out << "return X == Y ; }\n";
+ Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+ Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+ Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+ Out << "return X < Y ; }\n";
+ Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+ Out << "return X > Y ; }\n";
+ Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+ Out << "return X <= Y ; }\n";
+ Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+ Out << "return X >= Y ; }\n";
+ return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+ // Scan the module for floating point constants. If any FP constant is used
+ // in the function, we want to redirect it here so that we do not depend on
+ // the precision of the printed form, unless the printed form preserves
+ // precision.
+ //
+ for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+ I != E; ++I)
+ printFloatingPointConstants(*I);
+
+ Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+ // If this is a constant expression, recursively check for constant fp values.
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+ printFloatingPointConstants(CE->getOperand(i));
+ return;
+ }
+
+ // Otherwise, check for a FP constant that we need to print.
+ const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+ if (FPC == 0 ||
+ // Do not put in FPConstantMap if safe.
+ isFPCSafeToPrint(FPC) ||
+ // Already printed this constant?
+ FPConstantMap.count(FPC))
+ return;
+
+ FPConstantMap[FPC] = FPCounter; // Number the FP constants
+
+ if (FPC->getType() == Type::DoubleTy) {
+ double Val = FPC->getValueAPF().convertToDouble();
+ uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "ULL; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::FloatTy) {
+ float Val = FPC->getValueAPF().convertToFloat();
+ uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+ getZExtValue();
+ Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+ << " = 0x" << utohexstr(i)
+ << "U; /* " << Val << " */\n";
+ } else if (FPC->getType() == Type::X86_FP80Ty) {
+ // api needed to prevent premature destruction
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+ << " = { 0x" << utohexstr(p[0])
+ << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+ << "}; /* Long double constant */\n";
+ } else if (FPC->getType() == Type::PPC_FP128Ty) {
+ APInt api = FPC->getValueAPF().bitcastToAPInt();
+ const uint64_t *p = api.getRawData();
+ Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+ << " = { 0x"
+ << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+ << "}; /* Long double constant */\n";
+
+ } else {
+ assert(0 && "Unknown float type!");
+ }
+}
+
+
+
+/// printSymbolTable - Run through symbol table looking for type names. If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+ Out << "/* Helper union for bitcasts */\n";
+ Out << "typedef union {\n";
+ Out << " unsigned int Int32;\n";
+ Out << " unsigned long long Int64;\n";
+ Out << " float Float;\n";
+ Out << " double Double;\n";
+ Out << "} llvmBitCastUnion;\n";
+
+ // We are only interested in the type plane of the symbol table.
+ TypeSymbolTable::const_iterator I = TST.begin();
+ TypeSymbolTable::const_iterator End = TST.end();
+
+ // If there are no type names, exit early.
+ if (I == End) return;
+
+ // Print out forward declarations for structure types before anything else!
+ Out << "/* Structure forward decls */\n";
+ for (; I != End; ++I) {
+ std::string Name = "struct l_" + Mang->makeNameProper(I->first);
+ Out << Name << ";\n";
+ TypeNames.insert(std::make_pair(I->second, Name));
+ }
+
+ Out << '\n';
+
+ // Now we can print out typedefs. Above, we guaranteed that this can only be
+ // for struct or opaque types.
+ Out << "/* Typedefs */\n";
+ for (I = TST.begin(); I != End; ++I) {
+ std::string Name = "l_" + Mang->makeNameProper(I->first);
+ Out << "typedef ";
+ printType(Out, I->second, false, Name);
+ Out << ";\n";
+ }
+
+ Out << '\n';
+
+ // Keep track of which structures have been printed so far...
+ std::set<const Type *> StructPrinted;
+
+ // Loop over all structures then push them into the stack so they are
+ // printed in the correct order.
+ //
+ Out << "/* Structure contents */\n";
+ for (I = TST.begin(); I != End; ++I)
+ if (isa<StructType>(I->second) || isa<ArrayType>(I->second))
+ // Only print out used types!
+ printContainedStructs(I->second, StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO: Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+ std::set<const Type*> &StructPrinted) {
+ // Don't walk through pointers.
+ if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+
+ // Print all contained types first.
+ for (Type::subtype_iterator I = Ty->subtype_begin(),
+ E = Ty->subtype_end(); I != E; ++I)
+ printContainedStructs(*I, StructPrinted);
+
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ // Check to see if we have already printed this struct.
+ if (StructPrinted.insert(Ty).second) {
+ // Print structure type out.
+ std::string Name = TypeNames[Ty];
+ printType(Out, Ty, false, Name, true);
+ Out << ";\n\n";
+ }
+ }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F->hasStructRetAttr();
+
+ if (F->hasLocalLinkage()) Out << "static ";
+ if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+ if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Out << "__attribute__((stdcall)) ";
+ break;
+ case CallingConv::X86_FastCall:
+ Out << "__attribute__((fastcall)) ";
+ break;
+ }
+
+ // Loop over the arguments, printing them...
+ const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+ const AttrListPtr &PAL = F->getAttributes();
+
+ std::stringstream FunctionInnards;
+
+ // Print out the name...
+ FunctionInnards << GetValueName(F) << '(';
+
+ bool PrintedArg = false;
+ if (!F->isDeclaration()) {
+ if (!F->arg_empty()) {
+ Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ std::string ArgName;
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ if (I->hasName() || !Prototype)
+ ArgName = GetValueName(I);
+ else
+ ArgName = "";
+ const Type *ArgTy = I->getType();
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ ByValParams.insert(I);
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+ ArgName);
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+ } else {
+ // Loop over the arguments, printing them.
+ FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+ unsigned Idx = 1;
+
+ // If this is a struct-return function, don't print the hidden
+ // struct-return argument.
+ if (isStructReturn) {
+ assert(I != E && "Invalid struct return function!");
+ ++I;
+ ++Idx;
+ }
+
+ for (; I != E; ++I) {
+ if (PrintedArg) FunctionInnards << ", ";
+ const Type *ArgTy = *I;
+ if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+ assert(isa<PointerType>(ArgTy));
+ ArgTy = cast<PointerType>(ArgTy)->getElementType();
+ }
+ printType(FunctionInnards, ArgTy,
+ /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+ PrintedArg = true;
+ ++Idx;
+ }
+ }
+
+ // Finish printing arguments... if this is a vararg function, print the ...,
+ // unless there are no known types, in which case, we just emit ().
+ //
+ if (FT->isVarArg() && PrintedArg) {
+ if (PrintedArg) FunctionInnards << ", ";
+ FunctionInnards << "..."; // Output varargs portion of signature!
+ } else if (!FT->isVarArg() && !PrintedArg) {
+ FunctionInnards << "void"; // ret() -> ret(void) in C.
+ }
+ FunctionInnards << ')';
+
+ // Get the return tpe for the function.
+ const Type *RetTy;
+ if (!isStructReturn)
+ RetTy = F->getReturnType();
+ else {
+ // If this is a struct-return function, print the struct-return type.
+ RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+ }
+
+ // Print out the return type and the signature built above.
+ printType(Out, RetTy,
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+ FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+ if (!isa<BitCastInst>(I))
+ return false;
+ const Type *SrcTy = I.getOperand(0)->getType();
+ const Type *DstTy = I.getType();
+ return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
+ (DstTy->isFloatingPoint() && SrcTy->isInteger());
+}
+
+void CWriter::printFunction(Function &F) {
+ /// isStructReturn - Should this function actually return a struct by-value?
+ bool isStructReturn = F.hasStructRetAttr();
+
+ printFunctionSignature(&F, false);
+ Out << " {\n";
+
+ // If this is a struct return function, handle the result with magic.
+ if (isStructReturn) {
+ const Type *StructTy =
+ cast<PointerType>(F.arg_begin()->getType())->getElementType();
+ Out << " ";
+ printType(Out, StructTy, false, "StructReturn");
+ Out << "; /* Struct return temporary */\n";
+
+ Out << " ";
+ printType(Out, F.arg_begin()->getType(), false,
+ GetValueName(F.arg_begin()));
+ Out << " = &StructReturn;\n";
+ }
+
+ bool PrintedVar = false;
+
+ // print local variable information for the function
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+ Out << " ";
+ printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+ Out << "; /* Address-exposed local */\n";
+ PrintedVar = true;
+ } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) {
+ Out << " ";
+ printType(Out, I->getType(), false, GetValueName(&*I));
+ Out << ";\n";
+
+ if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well...
+ Out << " ";
+ printType(Out, I->getType(), false,
+ GetValueName(&*I)+"__PHI_TEMPORARY");
+ Out << ";\n";
+ }
+ PrintedVar = true;
+ }
+ // We need a temporary for the BitCast to use so it can pluck a value out
+ // of a union to do the BitCast. This is separate from the need for a
+ // variable to hold the result of the BitCast.
+ if (isFPIntBitCast(*I)) {
+ Out << " llvmBitCastUnion " << GetValueName(&*I)
+ << "__BITCAST_TEMPORARY;\n";
+ PrintedVar = true;
+ }
+ }
+
+ if (PrintedVar)
+ Out << '\n';
+
+ if (F.hasExternalLinkage() && F.getName() == "main")
+ Out << " CODE_FOR_MAIN();\n";
+
+ // print the basic blocks
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Loop *L = LI->getLoopFor(BB)) {
+ if (L->getHeader() == BB && L->getParentLoop() == 0)
+ printLoop(L);
+ } else {
+ printBasicBlock(BB);
+ }
+ }
+
+ Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+ Out << " do { /* Syntactic loop '" << L->getHeader()->getName()
+ << "' to make GCC happy */\n";
+ for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+ BasicBlock *BB = L->getBlocks()[i];
+ Loop *BBLoop = LI->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+ printLoop(BBLoop);
+ }
+ Out << " } while (1); /* end of syntactic loop '"
+ << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+ // Don't print the label for the basic block if there are no uses, or if
+ // the only terminator use is the predecessor basic block's terminator.
+ // We have to scan the use list because PHI nodes use basic blocks too but
+ // do not require a label to be generated.
+ //
+ bool NeedsLabel = false;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ if (isGotoCodeNecessary(*PI, BB)) {
+ NeedsLabel = true;
+ break;
+ }
+
+ if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+ ++II) {
+ if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+ if (II->getType() != Type::VoidTy && !isInlineAsm(*II))
+ outputLValue(II);
+ else
+ Out << " ";
+ writeInstComputationInline(*II);
+ Out << ";\n";
+ }
+ }
+
+ // Don't emit prefix or suffix for the terminator.
+ visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+ // If this is a struct return function, return the temporary struct.
+ bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+ if (isStructReturn) {
+ Out << " return StructReturn;\n";
+ return;
+ }
+
+ // Don't output a void return if this is the last basic block in the function
+ if (I.getNumOperands() == 0 &&
+ &*--I.getParent()->getParent()->end() == I.getParent() &&
+ !I.getParent()->size() == 1) {
+ return;
+ }
+
+ if (I.getNumOperands() > 1) {
+ Out << " {\n";
+ Out << " ";
+ printType(Out, I.getParent()->getParent()->getReturnType());
+ Out << " llvm_cbe_mrv_temp = {\n";
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ Out << " ";
+ writeOperand(I.getOperand(i));
+ if (i != e - 1)
+ Out << ",";
+ Out << "\n";
+ }
+ Out << " };\n";
+ Out << " return llvm_cbe_mrv_temp;\n";
+ Out << " }\n";
+ return;
+ }
+
+ Out << " return";
+ if (I.getNumOperands()) {
+ Out << ' ';
+ writeOperand(I.getOperand(0));
+ }
+ Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+ Out << " switch (";
+ writeOperand(SI.getOperand(0));
+ Out << ") {\n default:\n";
+ printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+ printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+ Out << ";\n";
+ for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+ Out << " case ";
+ writeOperand(SI.getOperand(i));
+ Out << ":\n";
+ BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+ printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+ printBranchToBlock(SI.getParent(), Succ, 2);
+ if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+ Out << " break;\n";
+ }
+ Out << " }\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+ Out << " /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+ /// FIXME: This should be reenabled, but loop reordering safe!!
+ return true;
+
+ if (next(Function::iterator(From)) != Function::iterator(To))
+ return true; // Not the direct successor, we need a goto.
+
+ //isa<SwitchInst>(From->getTerminator())
+
+ if (LI->getLoopFor(From) != LI->getLoopFor(To))
+ return true;
+ return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+ BasicBlock *Successor,
+ unsigned Indent) {
+ for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ // Now we have to do the printing.
+ Value *IV = PN->getIncomingValueForBlock(CurBlock);
+ if (!isa<UndefValue>(IV)) {
+ Out << std::string(Indent, ' ');
+ Out << " " << GetValueName(I) << "__PHI_TEMPORARY = ";
+ writeOperand(IV);
+ Out << "; /* for PHI node */\n";
+ }
+ }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+ unsigned Indent) {
+ if (isGotoCodeNecessary(CurBB, Succ)) {
+ Out << std::string(Indent, ' ') << " goto ";
+ writeOperand(Succ);
+ Out << ";\n";
+ }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+ if (I.isConditional()) {
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+ Out << " if (";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+ if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+ Out << " } else {\n";
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+ } else {
+ // First goto not necessary, assume second one is...
+ Out << " if (!";
+ writeOperand(I.getCondition());
+ Out << ") {\n";
+
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+ printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+ }
+
+ Out << " }\n";
+ } else {
+ printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+ printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+ }
+ Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks. We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+ writeOperand(&I);
+ Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+ // binary instructions, shift instructions, setCond instructions.
+ assert(!isa<PointerType>(I.getType()));
+
+ // We must cast the results of binary operations which might be promoted.
+ bool needsCast = false;
+ if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty)
+ || (I.getType() == Type::FloatTy)) {
+ needsCast = true;
+ Out << "((";
+ printType(Out, I.getType(), false);
+ Out << ")(";
+ }
+
+ // If this is a negation operation, print it out as such. For FP, we don't
+ // want to print "-0.0 - X".
+ if (BinaryOperator::isNeg(&I)) {
+ Out << "-(";
+ writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+ Out << ")";
+ } else if (I.getOpcode() == Instruction::FRem) {
+ // Output a call to fmod/fmodf instead of emitting a%b
+ if (I.getType() == Type::FloatTy)
+ Out << "fmodf(";
+ else if (I.getType() == Type::DoubleTy)
+ Out << "fmod(";
+ else // all 3 flavors of long double
+ Out << "fmodl(";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ } else {
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain instructions require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+ switch (I.getOpcode()) {
+ case Instruction::Add: Out << " + "; break;
+ case Instruction::Sub: Out << " - "; break;
+ case Instruction::Mul: Out << " * "; break;
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem: Out << " % "; break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv: Out << " / "; break;
+ case Instruction::And: Out << " & "; break;
+ case Instruction::Or: Out << " | "; break;
+ case Instruction::Xor: Out << " ^ "; break;
+ case Instruction::Shl : Out << " << "; break;
+ case Instruction::LShr:
+ case Instruction::AShr: Out << " >> "; break;
+ default: cerr << "Invalid operator type!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I.getOpcode());
+ if (NeedsClosingParens)
+ Out << "))";
+ }
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+ // We must cast the results of icmp which might be promoted.
+ bool needsCast = false;
+
+ // Write out the cast of the instruction's value back to the proper type
+ // if necessary.
+ bool NeedsClosingParens = writeInstructionCast(I);
+
+ // Certain icmp predicate require the operand to be forced to a specific type
+ // so we use writeOperandWithCast here instead of writeOperand. Similarly
+ // below for operand 1
+ writeOperandWithCast(I.getOperand(0), I);
+
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << " == "; break;
+ case ICmpInst::ICMP_NE: Out << " != "; break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE: Out << " <= "; break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE: Out << " >= "; break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: Out << " < "; break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT: Out << " > "; break;
+ default: cerr << "Invalid icmp predicate!" << I; abort();
+ }
+
+ writeOperandWithCast(I.getOperand(1), I);
+ if (NeedsClosingParens)
+ Out << "))";
+
+ if (needsCast) {
+ Out << "))";
+ }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+ if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+ Out << "0";
+ return;
+ }
+ if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+ Out << "1";
+ return;
+ }
+
+ const char* op = 0;
+ switch (I.getPredicate()) {
+ default: assert(0 && "Illegal FCmp predicate");
+ case FCmpInst::FCMP_ORD: op = "ord"; break;
+ case FCmpInst::FCMP_UNO: op = "uno"; break;
+ case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+ case FCmpInst::FCMP_UNE: op = "une"; break;
+ case FCmpInst::FCMP_ULT: op = "ult"; break;
+ case FCmpInst::FCMP_ULE: op = "ule"; break;
+ case FCmpInst::FCMP_UGT: op = "ugt"; break;
+ case FCmpInst::FCMP_UGE: op = "uge"; break;
+ case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+ case FCmpInst::FCMP_ONE: op = "one"; break;
+ case FCmpInst::FCMP_OLT: op = "olt"; break;
+ case FCmpInst::FCMP_OLE: op = "ole"; break;
+ case FCmpInst::FCMP_OGT: op = "ogt"; break;
+ case FCmpInst::FCMP_OGE: op = "oge"; break;
+ }
+
+ Out << "llvm_fcmp_" << op << "(";
+ // Write the first operand
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ // Write the second operand
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ default: assert(0 && "Invalid Type");
+ case Type::FloatTyID: return "Float";
+ case Type::DoubleTyID: return "Double";
+ case Type::IntegerTyID: {
+ unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+ if (NumBits <= 32)
+ return "Int32";
+ else
+ return "Int64";
+ }
+ }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+ const Type *DstTy = I.getType();
+ const Type *SrcTy = I.getOperand(0)->getType();
+ if (isFPIntBitCast(I)) {
+ Out << '(';
+ // These int<->float and long<->double casts need to be handled specially
+ Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+ writeOperand(I.getOperand(0));
+ Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+ << getFloatBitCastField(I.getType());
+ Out << ')';
+ return;
+ }
+
+ Out << '(';
+ printCast(I.getOpcode(), SrcTy, DstTy);
+
+ // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+ if (SrcTy == Type::Int1Ty && I.getOpcode() == Instruction::SExt)
+ Out << "0-";
+
+ writeOperand(I.getOperand(0));
+
+ if (DstTy == Type::Int1Ty &&
+ (I.getOpcode() == Instruction::Trunc ||
+ I.getOpcode() == Instruction::FPToUI ||
+ I.getOpcode() == Instruction::FPToSI ||
+ I.getOpcode() == Instruction::PtrToInt)) {
+ // Make sure we really get a trunc to bool by anding the operand with 1
+ Out << "&1u";
+ }
+ Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+ Out << "((";
+ writeOperand(I.getCondition());
+ Out << ") ? (";
+ writeOperand(I.getTrueValue());
+ Out << ") : (";
+ writeOperand(I.getFalseValue());
+ Out << "))";
+}
+
+
+void CWriter::lowerIntrinsics(Function &F) {
+ // This is used to keep track of intrinsics that get generated to a lowered
+ // function. We must generate the prototypes before the function body which
+ // will only be expanded on first use (by the loop below).
+ std::vector<Function*> prototypesToGen;
+
+ // Examine all the instructions in this function to find the intrinsics that
+ // need to be lowered.
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+ if (CallInst *CI = dyn_cast<CallInst>(I++))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ case Intrinsic::memory_barrier:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::setjmp:
+ case Intrinsic::longjmp:
+ case Intrinsic::prefetch:
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::powi:
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ case Intrinsic::ppc_altivec_lvsl:
+ // We directly implement these intrinsics
+ break;
+ default:
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we handle it.
+ const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ // If we handle it, don't lower it.
+ if (BuiltinName[0]) break;
+
+ // All other intrinsic calls we must lower.
+ Instruction *Before = 0;
+ if (CI != &BB->front())
+ Before = prior(BasicBlock::iterator(CI));
+
+ IL->LowerIntrinsicCall(CI);
+ if (Before) { // Move iterator to instruction after call
+ I = Before; ++I;
+ } else {
+ I = BB->begin();
+ }
+ // If the intrinsic got lowered to another call, and that call has
+ // a definition then we need to make sure its prototype is emitted
+ // before any calls to it.
+ if (CallInst *Call = dyn_cast<CallInst>(I))
+ if (Function *NewF = Call->getCalledFunction())
+ if (!NewF->isDeclaration())
+ prototypesToGen.push_back(NewF);
+
+ break;
+ }
+
+ // We may have collected some prototypes to emit in the loop above.
+ // Emit them now, before the function that uses them is emitted. But,
+ // be careful not to emit them twice.
+ std::vector<Function*>::iterator I = prototypesToGen.begin();
+ std::vector<Function*>::iterator E = prototypesToGen.end();
+ for ( ; I != E; ++I) {
+ if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+ Out << '\n';
+ printFunctionSignature(*I, true);
+ Out << ";\n";
+ }
+ }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+ if (isa<InlineAsm>(I.getOperand(0)))
+ return visitInlineAsm(I);
+
+ bool WroteCallee = false;
+
+ // Handle intrinsic function calls first...
+ if (Function *F = I.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ if (visitBuiltinCall(I, ID, WroteCallee))
+ return;
+
+ Value *Callee = I.getCalledValue();
+
+ const PointerType *PTy = cast<PointerType>(Callee->getType());
+ const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+ // If this is a call to a struct-return function, assign to the first
+ // parameter instead of passing it to the call.
+ const AttrListPtr &PAL = I.getAttributes();
+ bool hasByVal = I.hasByValArgument();
+ bool isStructRet = I.hasStructRetAttr();
+ if (isStructRet) {
+ writeOperandDeref(I.getOperand(1));
+ Out << " = ";
+ }
+
+ if (I.isTailCall()) Out << " /*tail*/ ";
+
+ if (!WroteCallee) {
+ // If this is an indirect call to a struct return function, we need to cast
+ // the pointer. Ditto for indirect calls with byval arguments.
+ bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+ // GCC is a real PITA. It does not permit codegening casts of functions to
+ // function pointers if they are in a call (it generates a trap instruction
+ // instead!). We work around this by inserting a cast to void* in between
+ // the function and the function pointer cast. Unfortunately, we can't just
+ // form the constant expression here, because the folder will immediately
+ // nuke it.
+ //
+ // Note finally, that this is completely unsafe. ANSI C does not guarantee
+ // that void* and function pointers have the same size. :( To deal with this
+ // in the common case, we handle casts where the number of arguments passed
+ // match exactly.
+ //
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+ if (CE->isCast())
+ if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+ NeedsCast = true;
+ Callee = RF;
+ }
+
+ if (NeedsCast) {
+ // Ok, just cast the pointer type.
+ Out << "((";
+ if (isStructRet)
+ printStructReturnPointerFunctionType(Out, PAL,
+ cast<PointerType>(I.getCalledValue()->getType()));
+ else if (hasByVal)
+ printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+ else
+ printType(Out, I.getCalledValue()->getType());
+ Out << ")(void*)";
+ }
+ writeOperand(Callee);
+ if (NeedsCast) Out << ')';
+ }
+
+ Out << '(';
+
+ unsigned NumDeclaredParams = FTy->getNumParams();
+
+ CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
+ unsigned ArgNo = 0;
+ if (isStructRet) { // Skip struct return argument.
+ ++AI;
+ ++ArgNo;
+ }
+
+ bool PrintedArg = false;
+ for (; AI != AE; ++AI, ++ArgNo) {
+ if (PrintedArg) Out << ", ";
+ if (ArgNo < NumDeclaredParams &&
+ (*AI)->getType() != FTy->getParamType(ArgNo)) {
+ Out << '(';
+ printType(Out, FTy->getParamType(ArgNo),
+ /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+ Out << ')';
+ }
+ // Check if the argument is expected to be passed by value.
+ if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+ writeOperandDeref(*AI);
+ else
+ writeOperand(*AI);
+ PrintedArg = true;
+ }
+ Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin. Returns true
+/// if the entire call is handled, return false it it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+ bool &WroteCallee) {
+ switch (ID) {
+ default: {
+ // If this is an intrinsic that directly corresponds to a GCC
+ // builtin, we emit it here.
+ const char *BuiltinName = "";
+ Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+ assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+ Out << BuiltinName;
+ WroteCallee = true;
+ return false;
+ }
+ case Intrinsic::memory_barrier:
+ Out << "__sync_synchronize()";
+ return true;
+ case Intrinsic::vastart:
+ Out << "0; ";
+
+ Out << "va_start(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ // Output the last argument to the enclosing function.
+ if (I.getParent()->getParent()->arg_empty()) {
+ cerr << "The C backend does not currently support zero "
+ << "argument varargs functions, such as '"
+ << I.getParent()->getParent()->getName() << "'!\n";
+ abort();
+ }
+ writeOperand(--I.getParent()->getParent()->arg_end());
+ Out << ')';
+ return true;
+ case Intrinsic::vaend:
+ if (!isa<ConstantPointerNull>(I.getOperand(1))) {
+ Out << "0; va_end(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ } else {
+ Out << "va_end(*(va_list*)0)";
+ }
+ return true;
+ case Intrinsic::vacopy:
+ Out << "0; ";
+ Out << "va_copy(*(va_list*)";
+ writeOperand(I.getOperand(1));
+ Out << ", *(va_list*)";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::returnaddress:
+ Out << "__builtin_return_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::frameaddress:
+ Out << "__builtin_frame_address(";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::powi:
+ Out << "__builtin_powi(";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::setjmp:
+ Out << "setjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ')';
+ return true;
+ case Intrinsic::longjmp:
+ Out << "longjmp(*(jmp_buf*)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ')';
+ return true;
+ case Intrinsic::prefetch:
+ Out << "LLVM_PREFETCH((const void *)";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ", ";
+ writeOperand(I.getOperand(3));
+ Out << ")";
+ return true;
+ case Intrinsic::stacksave:
+ // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+ // to work around GCC bugs (see PR1809).
+ Out << "0; *((void**)&" << GetValueName(&I)
+ << ") = __builtin_stack_save()";
+ return true;
+ case Intrinsic::dbg_stoppoint: {
+ // If we use writeOperand directly we get a "u" suffix which is rejected
+ // by gcc.
+ std::stringstream SPIStr;
+ DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
+ SPI.getDirectory()->print(SPIStr);
+ Out << "\n#line "
+ << SPI.getLine()
+ << " \"";
+ Out << SPIStr.str();
+ SPIStr.clear();
+ SPI.getFileName()->print(SPIStr);
+ Out << SPIStr.str() << "\"\n";
+ return true;
+ }
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse2_cmp_pd:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ // Multiple GCC builtins multiplex onto this intrinsic.
+ switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) {
+ default: assert(0 && "Invalid llvm.x86.sse.cmp!");
+ case 0: Out << "__builtin_ia32_cmpeq"; break;
+ case 1: Out << "__builtin_ia32_cmplt"; break;
+ case 2: Out << "__builtin_ia32_cmple"; break;
+ case 3: Out << "__builtin_ia32_cmpunord"; break;
+ case 4: Out << "__builtin_ia32_cmpneq"; break;
+ case 5: Out << "__builtin_ia32_cmpnlt"; break;
+ case 6: Out << "__builtin_ia32_cmpnle"; break;
+ case 7: Out << "__builtin_ia32_cmpord"; break;
+ }
+ if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+ Out << 'p';
+ else
+ Out << 's';
+ if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+ Out << 's';
+ else
+ Out << 'd';
+
+ Out << "(";
+ writeOperand(I.getOperand(1));
+ Out << ", ";
+ writeOperand(I.getOperand(2));
+ Out << ")";
+ return true;
+ case Intrinsic::ppc_altivec_lvsl:
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ')';
+ Out << "__builtin_altivec_lvsl(0, (void*)";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+ return true;
+ }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+// of the per target tables
+// handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+
+ assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+ const char *const *table = 0;
+
+ //Grab the translation table from TargetAsmInfo if it exists
+ if (!TAsm) {
+ std::string E;
+ const TargetMachineRegistry::entry* Match =
+ TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E);
+ if (Match) {
+ //Per platform Target Machines don't exist, so create it
+ // this must be done only once
+ const TargetMachine* TM = Match->CtorFn(*TheModule, "");
+ TAsm = TM->getTargetAsmInfo();
+ }
+ }
+ if (TAsm)
+ table = TAsm->getAsmCBE();
+
+ //Search the translation table if it exists
+ for (int i = 0; table && table[i]; i += 2)
+ if (c.Codes[0] == table[i])
+ return table[i+1];
+
+ //default is identity
+ return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+ for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+ if (asmstr[i] == '\n')
+ asmstr.replace(i, 1, "\\n");
+ else if (asmstr[i] == '\t')
+ asmstr.replace(i, 1, "\\t");
+ else if (asmstr[i] == '$') {
+ if (asmstr[i + 1] == '{') {
+ std::string::size_type a = asmstr.find_first_of(':', i + 1);
+ std::string::size_type b = asmstr.find_first_of('}', i + 1);
+ std::string n = "%" +
+ asmstr.substr(a + 1, b - a - 1) +
+ asmstr.substr(i + 2, a - i - 2);
+ asmstr.replace(i, b - i + 1, n);
+ i += n.size() - 1;
+ } else
+ asmstr.replace(i, 1, "%");
+ }
+ else if (asmstr[i] == '%')//grr
+ { asmstr.replace(i, 1, "%%"); ++i;}
+
+ return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+// handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+ InlineAsm* as = cast<InlineAsm>(CI.getOperand(0));
+ std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints();
+
+ std::vector<std::pair<Value*, int> > ResultVals;
+ if (CI.getType() == Type::VoidTy)
+ ;
+ else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+ for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+ ResultVals.push_back(std::make_pair(&CI, (int)i));
+ } else {
+ ResultVals.push_back(std::make_pair(&CI, -1));
+ }
+
+ // Fix up the asm string for gcc and emit it.
+ Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+ Out << " :";
+
+ unsigned ValueCount = 0;
+ bool IsFirst = true;
+
+ // Convert over all the output constraints.
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+
+ if (I->Type != InlineAsm::isOutput) {
+ ++ValueCount;
+ continue; // Ignore non-output constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ // Unpack the dest.
+ Value *DestVal;
+ int DestValNo = -1;
+
+ if (ValueCount < ResultVals.size()) {
+ DestVal = ResultVals[ValueCount].first;
+ DestValNo = ResultVals[ValueCount].second;
+ } else
+ DestVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+
+ if (I->isEarlyClobber)
+ C = "&"+C;
+
+ Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+ if (DestValNo != -1)
+ Out << ".field" << DestValNo; // Multiple retvals.
+ Out << ")";
+ ++ValueCount;
+ }
+
+
+ // Convert over all the input constraints.
+ Out << "\n :";
+ IsFirst = true;
+ ValueCount = 0;
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isInput) {
+ ++ValueCount;
+ continue; // Ignore non-input constraints.
+ }
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+ Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+
+ Out << "\"" << C << "\"(";
+ if (!I->isIndirect)
+ writeOperand(SrcVal);
+ else
+ writeOperandDeref(SrcVal);
+ Out << ")";
+ }
+
+ // Convert over the clobber constraints.
+ IsFirst = true;
+ ValueCount = 0;
+ for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(),
+ E = Constraints.end(); I != E; ++I) {
+ if (I->Type != InlineAsm::isClobber)
+ continue; // Ignore non-input constraints.
+
+ assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+ std::string C = InterpretASMConstraint(*I);
+ if (C.empty()) continue;
+
+ if (!IsFirst) {
+ Out << ", ";
+ IsFirst = false;
+ }
+
+ Out << '\"' << C << '"';
+ }
+
+ Out << ")";
+}
+
+void CWriter::visitMallocInst(MallocInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+ Out << '(';
+ printType(Out, I.getType());
+ Out << ") alloca(sizeof(";
+ printType(Out, I.getType()->getElementType());
+ Out << ')';
+ if (I.isArrayAllocation()) {
+ Out << " * " ;
+ writeOperand(I.getOperand(0));
+ }
+ Out << ')';
+}
+
+void CWriter::visitFreeInst(FreeInst &I) {
+ assert(0 && "lowerallocations pass didn't work!");
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+ gep_type_iterator E, bool Static) {
+
+ // If there are no indices, just print out the pointer.
+ if (I == E) {
+ writeOperand(Ptr);
+ return;
+ }
+
+ // Find out if the last index is into a vector. If so, we have to print this
+ // specially. Since vectors can't have elements of indexable type, only the
+ // last index could possibly be of a vector element.
+ const VectorType *LastIndexIsVector = 0;
+ {
+ for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+ LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+ }
+
+ Out << "(";
+
+ // If the last index is into a vector, we can't print it as &a[i][j] because
+ // we can't index into a vector with j in GCC. Instead, emit this as
+ // (((float*)&a[i])+j)
+ if (LastIndexIsVector) {
+ Out << "((";
+ printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+ Out << ")(";
+ }
+
+ Out << '&';
+
+ // If the first index is 0 (very typical) we can do a number of
+ // simplifications to clean up the code.
+ Value *FirstOp = I.getOperand();
+ if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+ // First index isn't simple, print it the hard way.
+ writeOperand(Ptr);
+ } else {
+ ++I; // Skip the zero index.
+
+ // Okay, emit the first operand. If Ptr is something that is already address
+ // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+ if (isAddressExposed(Ptr)) {
+ writeOperandInternal(Ptr, Static);
+ } else if (I != E && isa<StructType>(*I)) {
+ // If we didn't already emit the first operand, see if we can print it as
+ // P->f instead of "P[0].f"
+ writeOperand(Ptr);
+ Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ ++I; // eat the struct index as well.
+ } else {
+ // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+ Out << "(*";
+ writeOperand(Ptr);
+ Out << ")";
+ }
+ }
+
+ for (; I != E; ++I) {
+ if (isa<StructType>(*I)) {
+ Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+ } else if (isa<ArrayType>(*I)) {
+ Out << ".array[";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else if (!isa<VectorType>(*I)) {
+ Out << '[';
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << ']';
+ } else {
+ // If the last index is into a vector, then print it out as "+j)". This
+ // works with the 'LastIndexIsVector' code above.
+ if (isa<Constant>(I.getOperand()) &&
+ cast<Constant>(I.getOperand())->isNullValue()) {
+ Out << "))"; // avoid "+0".
+ } else {
+ Out << ")+(";
+ writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+ Out << "))";
+ }
+ }
+ }
+ Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+ bool IsVolatile, unsigned Alignment) {
+
+ bool IsUnaligned = Alignment &&
+ Alignment < TD->getABITypeAlignment(OperandType);
+
+ if (!IsUnaligned)
+ Out << '*';
+ if (IsVolatile || IsUnaligned) {
+ Out << "((";
+ if (IsUnaligned)
+ Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+ printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+ if (IsUnaligned) {
+ Out << "; } ";
+ if (IsVolatile) Out << "volatile ";
+ Out << "*";
+ }
+ Out << ")";
+ }
+
+ writeOperand(Operand);
+
+ if (IsVolatile || IsUnaligned) {
+ Out << ')';
+ if (IsUnaligned)
+ Out << "->data";
+ }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+ writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+ I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+ writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+ I.isVolatile(), I.getAlignment());
+ Out << " = ";
+ Value *Operand = I.getOperand(0);
+ Constant *BitMask = 0;
+ if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+ if (!ITy->isPowerOf2ByteWidth())
+ // We have a bit width that doesn't match an even power-of-2 byte
+ // size. Consequently we must & the value with the type's bit mask
+ BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+ if (BitMask)
+ Out << "((";
+ writeOperand(Operand);
+ if (BitMask) {
+ Out << ") & ";
+ printConstant(BitMask, false);
+ Out << ")";
+ }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+ printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+ gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+ Out << "va_arg(*(va_list*)";
+ writeOperand(I.getOperand(0));
+ Out << ", ";
+ printType(Out, I.getType());
+ Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+ const Type *EltTy = I.getType()->getElementType();
+ writeOperand(I.getOperand(0));
+ Out << ";\n ";
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(&I) << "))[";
+ writeOperand(I.getOperand(2));
+ Out << "] = (";
+ writeOperand(I.getOperand(1));
+ Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+ // We know that our operand is not inlined.
+ Out << "((";
+ const Type *EltTy =
+ cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+ writeOperand(I.getOperand(1));
+ Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ Out << "(";
+ printType(Out, SVI.getType());
+ Out << "){ ";
+ const VectorType *VT = SVI.getType();
+ unsigned NumElts = VT->getNumElements();
+ const Type *EltTy = VT->getElementType();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (i) Out << ", ";
+ int SrcVal = SVI.getMaskValue(i);
+ if ((unsigned)SrcVal >= NumElts*2) {
+ Out << " 0/*undef*/ ";
+ } else {
+ Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+ if (isa<Instruction>(Op)) {
+ // Do an extractelement of this value from the appropriate input.
+ Out << "((";
+ printType(Out, PointerType::getUnqual(EltTy));
+ Out << ")(&" << GetValueName(Op)
+ << "))[" << (SrcVal & (NumElts-1)) << "]";
+ } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+ Out << "0";
+ } else {
+ printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+ (NumElts-1)),
+ false);
+ }
+ }
+ }
+ Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+ // Start by copying the entire aggregate value into the result variable.
+ writeOperand(IVI.getOperand(0));
+ Out << ";\n ";
+
+ // Then do the insert to update the field.
+ Out << GetValueName(&IVI);
+ for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
+ if (isa<ArrayType>(IndexedTy))
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ Out << " = ";
+ writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+ Out << "(";
+ if (isa<UndefValue>(EVI.getOperand(0))) {
+ Out << "(";
+ printType(Out, EVI.getType());
+ Out << ") 0/*UNDEF*/";
+ } else {
+ Out << GetValueName(EVI.getOperand(0));
+ for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+ i != e; ++i) {
+ const Type *IndexedTy =
+ ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
+ if (isa<ArrayType>(IndexedTy))
+ Out << ".array[" << *i << "]";
+ else
+ Out << ".field" << *i;
+ }
+ }
+ Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
+ raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ if (FileType != TargetMachine::AssemblyFile) return true;
+
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerAllocationsPass(true));
+ PM.add(createLowerInvokePass());
+ PM.add(createCFGSimplificationPass()); // clean up after lower invoke.
+ PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
+ PM.add(new CWriter(o));
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
new file mode 100644
index 000000000000..be243366d50e
--- /dev/null
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(CBackend
+ CBackend.cpp
+ )
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 000000000000..8b262455ad34
--- /dev/null
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ CTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but must be requested explicitly on
+ // llc command line.
+ static unsigned getModuleMatchQuality(const Module &M) { return 0; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
new file mode 100644
index 000000000000..336de0c6f440
--- /dev/null
+++ b/lib/Target/CBackend/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCBackend
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
new file mode 100644
index 000000000000..1cf0a91078fa
--- /dev/null
+++ b/lib/Target/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_llvm_library(LLVMTarget
+ DarwinTargetAsmInfo.cpp
+ ELFTargetAsmInfo.cpp
+ SubtargetFeature.cpp
+ Target.cpp
+ TargetAsmInfo.cpp
+ TargetData.cpp
+ TargetFrameInfo.cpp
+ TargetInstrInfo.cpp
+ TargetMachOWriterInfo.cpp
+ TargetMachine.cpp
+ TargetMachineRegistry.cpp
+ TargetRegisterInfo.cpp
+ TargetSubtarget.cpp
+ )
+
+# TODO: Support other targets besides X86. See Makefile. \ No newline at end of file
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..4336b057a346
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMCellSPUAsmPrinter
+ SPUAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMCellSPUCodeGen n)
+
+add_dependencies(LLVMCellSPUAsmPrinter ${n})
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
new file mode 100644
index 000000000000..dd56df71a5de
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUAsmPrinter
+
+# Hack: we need to include 'main' CellSPU target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
new file mode 100644
index 000000000000..da1bf074de96
--- /dev/null
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -0,0 +1,623 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+ STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+ const std::string bss_section(".bss");
+
+ class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter {
+ std::set<std::string> FnStubs, GVStubs;
+ public:
+ explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V) :
+ AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ SPUTargetMachine &getTM() {
+ return static_cast<SPUTargetMachine&>(TM);
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO);
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(RegNo).AsmName;
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+
+ void
+ printS7ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ int value = MI->getOperand(OpNo).getImm();
+ value = (value << (32 - 7)) >> (32 - 7);
+
+ assert((value >= -(1 << 8) && value <= (1 << 7) - 1)
+ && "Invalid s7 argument");
+ O << value;
+ }
+
+ void
+ printU7ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value < (1 << 8) && "Invalid u7 argument");
+ O << value;
+ }
+
+ void
+ printShufAddr(const MachineInstr *MI, unsigned OpNo)
+ {
+ char value = MI->getOperand(OpNo).getImm();
+ O << (int) value;
+ O << "(";
+ printOperand(MI, OpNo+1);
+ O << ")";
+ }
+
+ void
+ printS16ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (short) MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU16ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU32ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ O << (unsigned)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printMemRegReg(const MachineInstr *MI, unsigned OpNo) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ O << ", ";
+ printOperand(MI, OpNo+1);
+ }
+
+ void
+ printU18ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+ O << value;
+ }
+
+ void
+ printS10ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+ && "Invalid s10 argument");
+ O << value;
+ }
+
+ void
+ printU10ImmOperand(const MachineInstr *MI, unsigned OpNo)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+ O << value;
+ }
+
+ void
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo)
+ {
+ assert(MI->getOperand(OpNo).isImm() &&
+ "printDFormAddr first operand is not immediate");
+ int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+ int16_t value16 = int16_t(value);
+ assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+ && "Invalid dform s10 offset argument");
+ O << (value16 & ~0xf) << "(";
+ printOperand(MI, OpNo+1);
+ O << ")";
+ }
+
+ void
+ printAddr256K(const MachineInstr *MI, unsigned OpNo)
+ {
+ /* Note: operand 1 is an offset or symbol name. */
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ if (MI->getOperand(OpNo+1).isImm()) {
+ int displ = int(MI->getOperand(OpNo+1).getImm());
+ if (displ > 0)
+ O << "+" << displ;
+ else if (displ < 0)
+ O << displ;
+ }
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo) {
+ // Used to generate a ".-<target>", but it turns out that the assembler
+ // really wants the target.
+ //
+ // N.B.: This operand is used for call targets. Branch hints are another
+ // animal entirely.
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo) {
+ // HBR operands are generated in front of branches, hence, the
+ // program counter plus the target.
+ O << ".+";
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ O << "@h";
+ }
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ printOp(MI->getOperand(OpNo));
+ O << "@l";
+ }
+ }
+
+ /// Print local store address
+ void printSymbolLSA(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo));
+ }
+
+ void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value < 16)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value <= 32)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+ //! Assembly printer cleanup after function has been emitted
+ virtual bool doFinalization(Module &M) = 0;
+ };
+
+ /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
+ class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ public:
+ explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level F,
+ bool V)
+ : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ //! Dump globals, perform cleanup after function emission
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ SPUAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ //! Emit a global variable according to its section and type
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ // External or weakly linked global variables need non-lazily-resolved
+ // stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ GVStubs.insert(Name);
+ O << "L" << Name << "$non_lazy_ptr";
+ return;
+ }
+ }
+ O << Name;
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemRegReg(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax
+/// to the current output stream.
+///
+void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+ printInstruction(MI);
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool
+LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+ EmitAlignment(3, F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.global\t" << CurrentFnName << "\n"
+ << "\t.type\t" << CurrentFnName << ", @function\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.global\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ break;
+ }
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << "\n";
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+
+bool LinuxAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+ SwitchToTextSection("\t.text");
+ // Emit initial debug information.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DW->BeginModule(&M, MMI, O, this, TAI);
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+/*!
+ Emit a global variable according to its section, alignment, etc.
+
+ \note This code was shamelessly copied from the PowerPC's assembly printer,
+ which sort of screams for some kind of refactorization of common code.
+ */
+void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return;
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.global " << name << '\n';
+ O << "\t.type " << name << ", @object\n";
+ O << name << ":\n";
+ O << "\t.zero " << Size << '\n';
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ }
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'\n";
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ // Should never be seen for the CellSPU platform...
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n"
+ << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n";
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool LinuxAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU
+/// assembly code for a MachineFunction to the given output stream, in a format
+/// that the Linux SPU assembler can deal with.
+///
+FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
+ SPUTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
new file mode 100644
index 000000000000..e3e12acc505d
--- /dev/null
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS SPU.td)
+
+tablegen(SPUGenInstrNames.inc -gen-instr-enums)
+tablegen(SPUGenRegisterNames.inc -gen-register-enums)
+tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(SPUGenCodeEmitter.inc -gen-emitter)
+tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SPUGenRegisterInfo.inc -gen-register-desc)
+tablegen(SPUGenInstrInfo.inc -gen-instr-desc)
+tablegen(SPUGenDAGISel.inc -gen-dag-isel)
+tablegen(SPUGenSubtarget.inc -gen-subtarget)
+tablegen(SPUGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(CellSPUCodeGen
+ SPUFrameInfo.cpp
+ SPUHazardRecognizers.cpp
+ SPUInstrInfo.cpp
+ SPUISelDAGToDAG.cpp
+ SPUISelLowering.cpp
+ SPURegisterInfo.cpp
+ SPUSubtarget.cpp
+ SPUTargetAsmInfo.cpp
+ SPUTargetMachine.cpp
+ )
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 000000000000..5d759a41c2c0
--- /dev/null
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,448 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+ RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+ RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+ RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+ RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+ RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+ RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+ RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+ RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+ RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+ RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+ RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+ RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+ RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+ RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+ RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+ RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyi $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+ RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyui $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+ RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB),
+ (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+ RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+ RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpys $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+ RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+ RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+ RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+ RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+ RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "and\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+ RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "andc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "andbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+ RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+ RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "or\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+ RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "addc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "orbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "orhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "xor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "xorbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xorhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nand\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+ Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+ (SHLIv4i32 VECREG:$rA, uimm7:$val)>;
+
+def CellSDKshlqbi:
+ Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+ (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+ Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+ (SHLQBIIv16i8 VECREG:$rA, uimm7:$val)>;
+
+def CellSDKshlqby:
+ Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+ (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+ Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+ (SHLQBYIv16i8 VECREG:$rA, uimm7:$val)>;
+
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+ RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceq\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+ RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+ RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "ceqbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqh\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+ RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+ RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+ RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "cgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+ RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+ RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+ RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+ RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+ RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "clgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fa\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fs\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+ RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+ RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fceq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+ RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+ RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmeq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+ RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+ RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+ RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+ RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+ RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+ RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+ RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+ RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+ RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
new file mode 100644
index 000000000000..a460db3cfeda
--- /dev/null
+++ b/lib/Target/CellSPU/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCellSPUCodeGen
+TARGET = SPU
+
+BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
+ SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
+ SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
+ SPUGenInstrInfo.inc SPUGenDAGISel.inc \
+ SPUGenSubtarget.inc SPUGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
new file mode 100644
index 000000000000..4783dd5d24eb
--- /dev/null
+++ b/lib/Target/CellSPU/README.txt
@@ -0,0 +1,90 @@
+//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
+
+This code was contributed by a team from the Computer Systems Research
+Department in The Aerospace Corporation:
+
+- Scott Michel (head bottle washer and much of the non-floating point
+ instructions)
+- Mark Thomas (floating point instructions)
+- Michael AuYeung (intrinsics)
+- Chandler Carruth (LLVM expertise)
+- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
+OTHERWISE. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
+OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
+OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
+LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
+REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
+OR PUNITIVE DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
+SUCH DAMAGES ARE FORESEEABLE.
+
+---------------------------------------------------------------------------
+--WARNING--:
+--WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
+--WARNING--:
+
+If you are brave enough to try this code or help to hack on it, be sure
+to add 'spu' to configure's --enable-targets option, e.g.:
+
+ ./configure <your_configure_flags_here> \
+ --enable-targets=x86,x86_64,powerpc,spu
+
+---------------------------------------------------------------------------
+
+TODO:
+* Create a machine pass for performing dual-pipeline scheduling specifically
+ for CellSPU, and insert branch prediction instructions as needed.
+
+* i32 instructions:
+
+ * i32 division (work-in-progress)
+
+* i64 support (see i64operations.c test harness):
+
+ * shifts and comparison operators: done
+ * sign and zero extension: done
+ * addition: done
+ * subtraction: needed
+ * multiplication: done
+
+* i128 support:
+
+ * zero extension, any extension: done
+ * sign extension: needed
+ * arithmetic operators (add, sub, mul, div): needed
+ * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
+
+ * or: done
+
+* f64 support
+
+ * Comparison operators:
+ SETOEQ unimplemented
+ SETOGT unimplemented
+ SETOGE unimplemented
+ SETOLT unimplemented
+ SETOLE unimplemented
+ SETONE unimplemented
+ SETO done (lowered)
+ SETUO done (lowered)
+ SETUEQ unimplemented
+ SETUGT unimplemented
+ SETUGE unimplemented
+ SETULT unimplemented
+ SETULE unimplemented
+ SETUNE unimplemented
+
+* LLVM vector suport
+
+ * VSETCC needs to be implemented. It's pretty straightforward to code, but
+ needs implementation.
+
+* Intrinsics
+
+ * spu.h instrinsics added but not tested. Need to have an operational
+ llvm-spu-gcc in order to write a unit test harness.
+
+===-------------------------------------------------------------------------===
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
new file mode 100644
index 000000000000..77a062e0e2e3
--- /dev/null
+++ b/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,102 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUTargetMachine;
+ class FunctionPass;
+ class raw_ostream;
+
+ FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUAsmPrinterPass(raw_ostream &o,
+ SPUTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+
+ /*--== Utility functions/predicates/etc used all over the place: --==*/
+ //! Predicate test for a signed 10-bit value
+ /*!
+ \param Value The input value to be tested
+
+ This predicate tests for a signed 10-bit value, returning the 10-bit value
+ as a short if true.
+ */
+ template<typename T>
+ inline bool isS10Constant(T Value);
+
+ template<>
+ inline bool isS10Constant<short>(short Value) {
+ int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10);
+ return ((Value > 0 && Value <= (1 << 9) - 1)
+ || (Value < 0 && (short) SExtValue == Value));
+ }
+
+ template<>
+ inline bool isS10Constant<int>(int Value) {
+ return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
+ }
+
+ template<>
+ inline bool isS10Constant<uint32_t>(uint32_t Value) {
+ return (Value <= ((1 << 9) - 1));
+ }
+
+ template<>
+ inline bool isS10Constant<int64_t>(int64_t Value) {
+ return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
+ }
+
+ template<>
+ inline bool isS10Constant<uint64_t>(uint64_t Value) {
+ return (Value <= ((1 << 9) - 1));
+ }
+
+ //! Predicate test for an unsigned 10-bit value
+ /*!
+ \param Value The input value to be tested
+
+ This predicate tests for an unsigned 10-bit value, returning the 10-bit value
+ as a short if true.
+ */
+ inline bool isU10Constant(short Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(int Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(uint32_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(int64_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+
+ inline bool isU10Constant(uint64_t Value) {
+ return (Value == (Value & 0x3ff));
+ }
+}
+
+// Defines symbolic names for the SPU instructions.
+//
+#include "SPUGenInstrNames.inc"
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
new file mode 100644
index 000000000000..8327fe03d7f8
--- /dev/null
+++ b/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+ SubtargetFeature<"large_mem","UseLargeMem", "true",
+ "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+ let InstructionSet = SPUInstrInfo;
+}
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 000000000000..3031fda54381
--- /dev/null
+++ b/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+ (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 000000000000..06eb1496def7
--- /dev/null
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,394 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+// Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+// compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>;
+ def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+ CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CLGTr64eq:
+ CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CLGTr64compare:
+ CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+ CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64compare:
+ CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>;
+ def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+ CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+ CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>;
+ def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64LGEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+ CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CGTr64eq:
+ CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>;
+
+def CGTr64compare:
+ CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+ CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64compare:
+ CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>;
+ def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+ CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+ CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>;
+ def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>;
+ def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+ I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ v2i64_sub_bg<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA),
+ (ORv2i64_i64 R64C:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+ SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+ [(set R64FP:$rT,
+ (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 000000000000..10dc837d90b7
--- /dev/null
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,115 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: Everything can be passed back via $3:
+def RetCC_SPU : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R3]>>,
+ CCIfType<[i16], CCAssignToReg<[R3]>>,
+ CCIfType<[i32], CCAssignToReg<[R3]>>,
+ CCIfType<[i64], CCAssignToReg<[R3]>>,
+ CCIfType<[i128], CCAssignToReg<[R3]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
+ CCIfType<[v2i32], CCAssignToReg<[R3]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+// (note: this isn't used, but presumably should be at some point when other
+// targets do.)
+//===----------------------------------------------------------------------===//
+/*
+def CC_SPU : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
+]>;
+*/
diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp
new file mode 100644
index 000000000000..60d7ba736ac6
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameInfo.cpp
@@ -0,0 +1,29 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameInfo.h"
+#include "SPURegisterNames.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameInfo:
+//===----------------------------------------------------------------------===//
+
+SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm):
+ TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ TM(tm)
+{
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h
new file mode 100644
index 000000000000..e8ca333f0b69
--- /dev/null
+++ b/lib/Target/CellSPU/SPUFrameInfo.h
@@ -0,0 +1,79 @@
+//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(SPUFRAMEINFO_H)
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SPURegisterInfo.h"
+
+namespace llvm {
+ class SPUFrameInfo: public TargetFrameInfo {
+ const TargetMachine &TM;
+ std::pair<unsigned, int> LR[1];
+
+ public:
+ SPUFrameInfo(const TargetMachine &tm);
+
+ //! Return a function's saved spill slots
+ /*!
+ For CellSPU, a function's saved spill slots is just the link register.
+ */
+ const std::pair<unsigned, int> *
+ getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+ //! Stack slot size (16 bytes)
+ static int stackSlotSize() {
+ return 16;
+ }
+ //! Maximum frame offset representable by a signed 10-bit integer
+ /*!
+ This is the maximum frame offset that can be expressed as a 10-bit
+ integer, used in D-form addresses.
+ */
+ static int maxFrameOffset() {
+ return ((1 << 9) - 1) * stackSlotSize();
+ }
+ //! Minimum frame offset representable by a signed 10-bit integer
+ static int minFrameOffset() {
+ return -(1 << 9) * stackSlotSize();
+ }
+ //! Minimum frame size (enough to spill LR + SP)
+ static int minStackSize() {
+ return (2 * stackSlotSize());
+ }
+ //! Frame size required to spill all registers plus frame info
+ static int fullSpillSize() {
+ return (SPURegisterInfo::getNumArgRegs() * stackSlotSize());
+ }
+ //! Convert frame index to stack offset
+ static int FItoStackOffset(int frame_index) {
+ return frame_index * stackSlotSize();
+ }
+ //! Number of instructions required to overcome hint-for-branch latency
+ /*!
+ HBR (hint-for-branch) instructions can be inserted when, for example,
+ we know that a given function is going to be called, such as printf(),
+ in the control flow graph. HBRs are only inserted if a sufficient number
+ of instructions occurs between the HBR and the target. Currently, HBRs
+ take 6 cycles, ergo, the magic number 6.
+ */
+ static int branchHintPenalty() {
+ return 6;
+ }
+ };
+}
+
+#define SPUFRAMEINFO_H 1
+#endif
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 000000000000..caaa71a422fb
--- /dev/null
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,138 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+ TII(tii),
+ EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU)
+{
+ // Initial thoughts on how to do this, but this code cannot work unless the
+ // function's prolog and epilog code are also being scheduled so that we can
+ // accurately determine which pipeline is being scheduled.
+#if 0
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ ScheduleHazardRecognizer::HazardType retval = NoHazard;
+ bool mustBeOdd = false;
+
+ switch (Node->getOpcode()) {
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16:
+ case SPU::LQAv16i8:
+ case SPU::LQAv8i16:
+ case SPU::LQAv4i32:
+ case SPU::LQAv4f32:
+ case SPU::LQAv2f64:
+ case SPU::LQAr128:
+ case SPU::LQAr64:
+ case SPU::LQAr32:
+ case SPU::LQXv4i32:
+ case SPU::LQXr128:
+ case SPU::LQXr64:
+ case SPU::LQXr32:
+ case SPU::LQXr16:
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8:
+ case SPU::STQAv16i8:
+ case SPU::STQAv8i16:
+ case SPU::STQAv4i32:
+ case SPU::STQAv4f32:
+ case SPU::STQAv2f64:
+ case SPU::STQAr128:
+ case SPU::STQAr64:
+ case SPU::STQAr32:
+ case SPU::STQAr16:
+ case SPU::STQAr8:
+ case SPU::STQXv16i8:
+ case SPU::STQXv8i16:
+ case SPU::STQXv4i32:
+ case SPU::STQXv4f32:
+ case SPU::STQXv2f64:
+ case SPU::STQXr128:
+ case SPU::STQXr64:
+ case SPU::STQXr32:
+ case SPU::STQXr16:
+ case SPU::STQXr8:
+ case SPU::RET:
+ mustBeOdd = true;
+ break;
+ default:
+ // Assume that this instruction can be on the even pipe
+ break;
+ }
+
+ if (mustBeOdd && !EvenOdd)
+ retval = Hazard;
+
+ DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n";
+ EvenOdd ^= 1;
+ return retval;
+#else
+ return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+ DOUT << "SPUHazardRecognizer::AdvanceCycle\n";
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+ AdvanceCycle();
+}
diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 000000000000..d0ae2d8e71c8
--- /dev/null
+++ b/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+ const TargetInstrInfo &TII;
+ int EvenOdd;
+
+public:
+ SPUHazardRecognizer(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 000000000000..779d75d0218a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1244 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUISelLowering.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameInfo.h"
+#include "SPURegisterNames.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI64IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI32IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+ bool
+ isI32IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isU10Constant(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isS10Constant(CN->getSExtValue());
+ }
+
+ //! SDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(SDNode *N)
+ {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ return (CN != 0 && isI16IntS10Immediate(CN));
+ }
+
+ //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+ bool
+ isI16IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isU10Constant((short) CN->getZExtValue());
+ }
+
+ //! SDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntU10Immediate(SDNode *N)
+ {
+ return (N->getOpcode() == ISD::Constant
+ && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
+ }
+
+ //! ConstantSDNode predicate for signed 16-bit values
+ /*!
+ \arg CN The constant SelectionDAG node holding the value
+ \arg Imm The returned 16-bit value, if returning true
+
+ This predicate tests the value in \a CN to see whether it can be
+ represented as a 16-bit, sign-extended quantity. Returns true if
+ this is the case.
+ */
+ bool
+ isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+ {
+ MVT vt = CN->getValueType(0);
+ Imm = (short) CN->getZExtValue();
+ if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+ return true;
+ } else if (vt == MVT::i32) {
+ int32_t i_val = (int32_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ } else {
+ int64_t i_val = (int64_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ }
+
+ return false;
+ }
+
+ //! SDNode predicate for signed 16-bit values.
+ bool
+ isIntS16Immediate(SDNode *N, short &Imm)
+ {
+ return (N->getOpcode() == ISD::Constant
+ && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
+ }
+
+ //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+ static bool
+ isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+ {
+ MVT vt = FPN->getValueType(0);
+ if (vt == MVT::f32) {
+ int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+ int sval = (int) ((val << 16) >> 16);
+ Imm = (short) val;
+ return val == sval;
+ }
+
+ return false;
+ }
+
+ bool
+ isHighLow(const SDValue &Op)
+ {
+ return (Op.getOpcode() == SPUISD::IndirectAddr
+ && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
+ && Op.getOperand(1).getOpcode() == SPUISD::Lo)
+ || (Op.getOperand(0).getOpcode() == SPUISD::Lo
+ && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
+ }
+
+ //===------------------------------------------------------------------===//
+ //! MVT to "useful stuff" mapping structure:
+
+ struct valtype_map_s {
+ MVT VT;
+ unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined)
+ bool ldresult_imm; /// LDRESULT instruction requires immediate?
+ unsigned lrinst; /// LR instruction
+ };
+
+ const valtype_map_s valtype_map[] = {
+ { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 },
+ { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 },
+ { MVT::i32, SPU::ORIr32, true, SPU::LRr32 },
+ { MVT::i64, SPU::ORr64, false, SPU::LRr64 },
+ { MVT::f32, SPU::ORf32, false, SPU::LRf32 },
+ { MVT::f64, SPU::ORf64, false, SPU::LRf64 },
+ // vector types... (sigh!)
+ { MVT::v16i8, 0, false, SPU::LRv16i8 },
+ { MVT::v8i16, 0, false, SPU::LRv8i16 },
+ { MVT::v4i32, 0, false, SPU::LRv4i32 },
+ { MVT::v2i64, 0, false, SPU::LRv2i64 },
+ { MVT::v4f32, 0, false, SPU::LRv4f32 },
+ { MVT::v2f64, 0, false, SPU::LRv2f64 }
+ };
+
+ const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
+
+ const valtype_map_s *getValueTypeMapEntry(MVT VT)
+ {
+ const valtype_map_s *retval = 0;
+ for (size_t i = 0; i < n_valtype_map; ++i) {
+ if (valtype_map[i].VT == VT) {
+ retval = valtype_map + i;
+ break;
+ }
+ }
+
+
+#ifndef NDEBUG
+ if (retval == 0) {
+ cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+#endif
+
+ return retval;
+ }
+
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ SPUTargetMachine &TM;
+ SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+ }
+
+ SDNode *emitBuildVector(SDValue build_vec) {
+ MVT vecVT = build_vec.getValueType();
+ MVT eltVT = vecVT.getVectorElementType();
+ SDNode *bvNode = build_vec.getNode();
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
+ return Select(build_vec);
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG,
+ SPUtli.getSPUTargetMachine());
+ return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1))
+ SelectXFormAddr(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+#if 1
+ assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
+ const TargetInstrInfo *II = TM.getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new SPUHazardRecognizer(*II);
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void
+SPUDAGToDAGISel::InstructionSelect()
+{
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ // These match the addr256k operand type:
+ MVT OffsVT = MVT::i16;
+ SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+
+ switch (N.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n";
+ abort();
+ /*NOTREACHED*/
+
+ case ISD::TargetConstant:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetJumpTable:
+ cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as "
+ << "A-form address.\n";
+ abort();
+ /*NOTREACHED*/
+
+ case SPUISD::AFormAddr:
+ // Just load from memory if there's only a single use of the location,
+ // otherwise, this will get handled below with D-form offset addresses
+ if (N.hasOneUse()) {
+ SDValue Op0 = N.getOperand(0);
+ switch (Op0.getOpcode()) {
+ case ISD::TargetConstantPool:
+ case ISD::TargetJumpTable:
+ Base = Op0;
+ Index = Zero;
+ return true;
+
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+ GlobalValue *GV = GSDN->getGlobal();
+ if (GV->getAlignment() == 16) {
+ Base = Op0;
+ Index = Zero;
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ const int minDForm2Offset = -(1 << 7);
+ const int maxDForm2Offset = (1 << 7) - 1;
+ return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+ maxDForm2Offset);
+}
+
+/*!
+ \arg Op The ISD instruction (ignored)
+ \arg N The address to be tested
+ \arg Base Base address register/pointer
+ \arg Index Base address index
+
+ Examine the input address by a base register plus a signed 10-bit
+ displacement, [r+I10] (D-form address).
+
+ \return true if \a N is a D-form address with \a Base and \a Index set
+ to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return DFormAddressPredicate(Op, N, Base, Index,
+ SPUFrameInfo::minFrameOffset(),
+ SPUFrameInfo::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index, int minOffset,
+ int maxOffset) {
+ unsigned Opc = N.getOpcode();
+ MVT PtrTy = SPUtli.getPointerTy();
+
+ if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI << "\n");
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ // Generated by getelementptr
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+ || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op0.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op0;
+ return true;
+ }
+ } else if (Op0.getOpcode() == ISD::Constant
+ || Op0.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op1.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1);
+ int FI = int(FIN->getIndex());
+ DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op1;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Indirect with constant offset -> D-Form address
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::Hi
+ && Op1.getOpcode() == SPUISD::Lo) {
+ // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+ int32_t offset = 0;
+ SDValue idxOp;
+
+ if (isa<ConstantSDNode>(Op1)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op0;
+ } else if (isa<ConstantSDNode>(Op0)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op1;
+ }
+
+ if (offset >= minOffset && offset <= maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = idxOp;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::AFormAddr) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == SPUISD::LDRESULT) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
+ unsigned OpOpc = Op.getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Addr, Offs;
+
+ // Get the register from CopyFromReg
+ if (Opc == ISD::CopyFromReg)
+ Addr = N.getOperand(1);
+ else
+ Addr = N; // Register
+
+ Offs = ((OpOpc == ISD::STORE) ? Op.getOperand(3) : Op.getOperand(2));
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = Addr;
+ return true;
+ }
+ } else {
+ /* If otherwise unadorned, default to D-form address with 0 offset: */
+ if (Opc == ISD::CopyFromReg) {
+ Index = N.getOperand(1);
+ } else {
+ Index = N;
+ }
+
+ Base = CurDAG->getTargetConstant(0, Index.getValueType());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address operand
+ \arg Base The base pointer operand
+ \arg Index The offset/index operand
+
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // If the address is neither A-form or D-form, punt and use an X-form
+ // address:
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ unsigned Opc = N->getOpcode();
+ int n_ops = -1;
+ unsigned NewOpc;
+ MVT OpVT = Op.getValueType();
+ SDValue Ops[8];
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+
+ if (Opc == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ SDValue Imm0 = CurDAG->getTargetConstant(0, Op.getValueType());
+
+ if (FI < 128) {
+ NewOpc = SPU::AIr32;
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
+ n_ops = 2;
+ } else {
+ NewOpc = SPU::Ar32;
+ Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType());
+ Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, dl, Op.getValueType(),
+ TFI, Imm0), 0);
+ n_ops = 2;
+ }
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+ MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
+ MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU Select: Unhandled zero/any extend MVT\n";
+ abort();
+ /*NOTREACHED*/
+ break;
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask);
+ SDNode *PromoteScalar =
+ SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0));
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(PromoteScalar, 0),
+ SDValue(PromoteScalar, 0),
+ SDValue(shufMaskLoad, 0));
+
+ // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
+ // re-use it in the VEC2PREFSLOT selection without needing to explicitly
+ // call SelectCode (it's already done for us.)
+ SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle));
+ return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
+
+ return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ Op.getOperand(0), Op.getOperand(1),
+ SDValue(CGLoad, 0)));
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = Op.getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+ //
+ // Take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getTargetNode(SPU::ORr32_r64, dl, OpVT,
+ Op0.getOperand(0));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getTargetNode(Opc, dl, OpVT, SDValue(hi32, 0),
+ shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64) {
+ return SelectSHLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64) {
+ return SelectSRLi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64) {
+ return SelectSRAi64(Op, OpVT);
+ }
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = Op.getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::XORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst, MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::XORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst));
+ }
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec);
+ return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ }
+ } else if (Opc == SPUISD::LDRESULT) {
+ // Custom select instructions for LDRESULT
+ MVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+ SDValue Chain = N->getOperand(1);
+ SDNode *Result;
+ const valtype_map_s *vtm = getValueTypeMapEntry(VT);
+
+ if (vtm->ldresult_ins == 0) {
+ cerr << "LDRESULT for unsupported type: "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+
+ Opc = vtm->ldresult_ins;
+ if (vtm->ldresult_imm) {
+ SDValue Zero = CurDAG->getTargetConstant(0, VT);
+
+ Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain);
+ } else {
+ Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain);
+ }
+
+ return Result;
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ MVT VT = Op.getValueType();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
+ if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+ NewOpc = (isI32IntS10Immediate(CN) ? SPU::AIr32 : SPU::Ar32);
+ }
+ Ops[0] = Op0;
+ Ops[1] = Op1;
+ n_ops = 2;
+ }
+ }
+
+ if (n_ops > 0) {
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+ else
+ return CurDAG->getTargetNode(NewOpc, dl, OpVT, Ops, n_ops);
+ } else
+ return SelectCode(Op);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+ DebugLoc dl = Op.getDebugLoc();
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::SHLQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0);
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQMBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) {
+ // Promote Op0 to vector
+ MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = Op.getOperand(1);
+ MVT ShiftAmtVT = ShiftAmt.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDNode *VecOp0 =
+ CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getTargetNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getTargetNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYIv2i64, dl, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getTargetNode(SPU::ROTQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
+ DebugLoc dl) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
+ DebugLoc dl) {
+ MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BIT_CONVERT) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+
+ ReplaceUses(i64vec, Op0);
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(emitBuildVector(Op0), 0));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs));
+
+ if (rhs.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs));
+
+ if (shufmask.getOpcode() == ISD::BIT_CONVERT) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask));
+
+ SDNode *shufNode =
+ Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0)));
+
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(shufNode, 0));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(emitBuildVector(i64vec), 0));
+ } else {
+ cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
+ abort();
+ }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+ return new SPUDAGToDAGISel(TM);
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 000000000000..864a914bba78
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,2980 @@
+//
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+ std::map<unsigned, const char *> node_names;
+
+ //! MVT mapping to useful data for Cell SPU
+ struct valtype_map_s {
+ const MVT valtype;
+ const int prefslot_byte;
+ };
+
+ const valtype_map_s valtype_map[] = {
+ { MVT::i1, 3 },
+ { MVT::i8, 3 },
+ { MVT::i16, 2 },
+ { MVT::i32, 0 },
+ { MVT::f32, 0 },
+ { MVT::i64, 0 },
+ { MVT::f64, 0 },
+ { MVT::i128, 0 }
+ };
+
+ const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
+
+ const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+ const valtype_map_s *retval = 0;
+
+ for (size_t i = 0; i < n_valtype_map; ++i) {
+ if (valtype_map[i].valtype == VT) {
+ retval = valtype_map + i;
+ break;
+ }
+ }
+
+#ifndef NDEBUG
+ if (retval == 0) {
+ cerr << "getValueTypeMapEntry returns NULL for "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ }
+#endif
+
+ return retval;
+ }
+
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ MVT ArgVT = Op.getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForMVT();
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ CallingConv::C, false, Callee, Args, DAG,
+ Op.getDebugLoc());
+
+ return CallInfo.first;
+ }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+ : TargetLowering(TM),
+ SPUTM(TM)
+{
+ // Fold away setcc operations if possible.
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+ // Set up the SPU's register classes:
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+ // SPU has no sign or zero extended loads for i1, i8, i16:
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ // SPU constant load actions are custom lowered:
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ // SPU's loads and stores have to be custom lowered:
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Expand the jumptable branches
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ // SPU has no intrinsics for these particular operations:
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ // SPU has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // SPU can do rotate right and left, so legalize it... but customize for i8
+ // because instructions don't exist.
+
+ // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+ // .td files.
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
+ // SPU has no native version of shift left/right for i8
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
+
+ // Custom lower i8, i32 and i64 multiplications
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
+
+ // SPU does not have BSWAP. It does have i32 support CTLZ.
+ // CTPOP has to be custom lowered.
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
+ // select ought to work:
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ // FDIV on SPU requires custom lowering
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
+
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ }
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
+
+ // Cell SPU has instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+ // "Odd size" vector classes that we're willing to support:
+ addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
+
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ // mul has to be custom lowered.
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+
+ // These operations need to be expanded:
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Custom lower build_vector, constant pool spills, insert and
+ // extract vector elements:
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+ setShiftAmountType(MVT::i32);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+ setStackPointerRegisterToSaveRestore(SPU::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(SchedulingForRegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ if (node_names.empty()) {
+ node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+ node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+ node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+ node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+ node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+ node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+ node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+ node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+ node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+ node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+ node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+ node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+ node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
+ node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
+ node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
+ node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
+ node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
+ node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+ node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+ node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+ "SPUISD::ROTBYTES_LEFT_BITS";
+ node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+ node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+ node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+ node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+ node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+ }
+
+ std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+ return ((i != node_names.end()) ? i->second : 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ LoadSDNode *LN = cast<LoadSDNode>(Op);
+ SDValue the_chain = LN->getChain();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ MVT InVT = LN->getMemoryVT();
+ MVT OutVT = Op.getValueType();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ unsigned alignment = LN->getAlignment();
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (LN->getAddressingMode()) {
+ case ISD::UNINDEXED: {
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(rotamt, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = result.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
+ result.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
+
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
+ }
+
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+ SDValue retops[2] = {
+ result,
+ the_chain
+ };
+
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+ retops, sizeof(retops) / sizeof(retops[0]));
+ return result;
+ }
+ case ISD::PRE_INC:
+ case ISD::PRE_DEC:
+ case ISD::POST_INC:
+ case ISD::POST_DEC:
+ case ISD::LAST_INDEXED_MODE:
+ cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ "UNINDEXED\n";
+ cerr << (unsigned) LN->getAddressingMode() << "\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ StoreSDNode *SN = cast<StoreSDNode>(Op);
+ SDValue Value = SN->getValue();
+ MVT VT = Value.getValueType();
+ MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned alignment = SN->getAlignment();
+
+ switch (SN->getAddressingMode()) {
+ case ISD::UNINDEXED: {
+ // The vector type we really want to load from the 16-byte chunk.
+ MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
+ stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
+
+ LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
+ SDValue theValue = SN->getValue();
+ SDValue result;
+
+ if (StVT != VT
+ && (theValue.getOpcode() == ISD::AssertZext
+ || theValue.getOpcode() == ISD::AssertSext)) {
+ // Drill down and get the value for zero- and sign-extended
+ // quantities
+ theValue = theValue.getOperand(0);
+ }
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
+
+ SDValue insertEltOp =
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
+ SDValue vectorizeOp =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, alignLoadVec,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32, insertEltOp));
+
+ result = DAG.getStore(the_chain, dl, result, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), LN->getAlignment());
+
+#if 0 && !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ const SDValue &currentRoot = DAG.getRoot();
+
+ DAG.setRoot(result);
+ cerr << "------- CellSPU:LowerStore result:\n";
+ DAG.dump();
+ cerr << "-------\n";
+ DAG.setRoot(currentRoot);
+ }
+#endif
+
+ return result;
+ /*UNREACHED*/
+ }
+ case ISD::PRE_INC:
+ case ISD::PRE_DEC:
+ case ISD::POST_INC:
+ case ISD::POST_DEC:
+ case ISD::LAST_INDEXED_MODE:
+ cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
+ "UNINDEXED\n";
+ cerr << (unsigned) SN->getAddressingMode() << "\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Generate the address of a constant pool entry.
+SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ // Just return the SDValue with the constant pool address in it.
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ assert(0 &&
+ "LowerConstantPool: Relocation model other than static"
+ " not supported.");
+ return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ assert(0 &&
+ "LowerJumpTable: Relocation model other than static not supported.");
+ return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ MVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ const TargetMachine &TM = DAG.getTarget();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ } else {
+ cerr << "LowerGlobalAddress: Relocation model other than static not "
+ << "supported.\n";
+ abort();
+ /*NOTREACHED*/
+ }
+
+ return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
+ uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
+ }
+
+ return SDValue();
+}
+
+static SDValue
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SmallVector<SDValue, 48> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
+ const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+
+ unsigned ArgOffset = SPUFrameInfo::minStackSize();
+ unsigned ArgRegIdx = 0;
+ unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Add DAG nodes to load the arguments or copy them out of registers.
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
+ ArgNo != e; ++ArgNo) {
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ SDValue ArgVal;
+
+ if (ArgRegIdx < NumArgRegs) {
+ const TargetRegisterClass *ArgRegClass;
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << ObjectVT.getMVTString()
+ << "\n";
+ abort();
+ }
+ case MVT::i8:
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
+ case MVT::i16:
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
+ case MVT::i32:
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
+ case MVT::i64:
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
+ case MVT::f32:
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
+ case MVT::f64:
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+ RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ ++ArgRegIdx;
+ } else {
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type
+ // or we're forced to do vararg
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ ArgOffset += StackSlotSize;
+ }
+
+ ArgValues.push_back(ArgVal);
+ // Update the chain
+ Root = ArgVal.getOperand(0);
+ }
+
+ // vararg handling:
+ if (isVarArg) {
+ // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // We will spill (79-3)+1 registers to the stack
+ SmallVector<SDValue, 79-3+1> MemOps;
+
+ // Create the frame slot
+
+ for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
+ SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
+ Root = Store.getOperand(0);
+ MemOps.push_back(Store);
+
+ // Increment address by stack slot size for the next stored argument
+ ArgOffset += StackSlotSize;
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 14 >> 14) != Addr)
+ return 0; // Top 14 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+static SDValue
+LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ unsigned NumOps = TheCall->getNumArgs();
+ unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
+ const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
+ const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Handy pointer type
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP]
+ unsigned NumStackBytes = SPUFrameInfo::minStackSize();
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory.
+ unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
+ unsigned ArgRegIdx = 0;
+
+ // Keep track of registers passing arguments
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ // And the arguments passed on the stack
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ switch (Arg.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ }
+ }
+
+ // Update number of stack bytes actually used, insert a call sequence start
+ NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+ true));
+
+ if (!MemOpChains.empty()) {
+ // Adjust the stack pointer for the stack arguments.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = SPUISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ MVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+ }
+ } else {
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ MVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ // If this is an absolute destination address that appears to be a legal
+ // local store address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (TheCall->getValueType(0) != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ SDValue ResultVals[3];
+ unsigned NumResults = 0;
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (TheCall->getValueType(0).getSimpleVT()) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other: break;
+ case MVT::i32:
+ if (TheCall->getValueType(1) == MVT::i32) {
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
+ MVT::i32, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals[1] = Chain.getValue(0);
+ NumResults = 2;
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ }
+ break;
+ case MVT::i64:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::i128:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
+ InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
+ }
+
+ // If the function returns void, just return the chain.
+ if (NumResults == 0)
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals[NumResults++] = Chain;
+ SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
+ return Res.getValue(Op.getResNo());
+}
+
+static SDValue
+LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+ CCState CCInfo(CC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+ SDValue OpVal(0, 0);
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return 0;
+ }
+
+ if (OpVal.getNode() != 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ return CN;
+ }
+ }
+
+ return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value <= 0x3ffff)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (isS10Constant(Value))
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int Value = (int) CN->getZExtValue();
+ if (ValueType == MVT::i16
+ && Value <= 0xffff /* truncated from uint64_t */
+ && ((short) Value >> 8) == ((short) Value & 0xff))
+ return DAG.getTargetConstant(Value & 0xff, ValueType);
+ else if (ValueType == MVT::i8
+ && (Value & 0xff) == Value)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if ((ValueType == MVT::i32
+ && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+ return DAG.getTargetConstant(Value >> 16, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+ }
+
+ return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+
+ switch (VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+ << VT.getMVTString()
+ << "\n";
+ abort();
+ /*NOTREACHED*/
+ case MVT::v4f32: {
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
+ && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+ break;
+ }
+ case MVT::v2f64: {
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+ break;
+ }
+ case MVT::v16i8: {
+ // 8-bit constants have to be expanded to 16-bits
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+ }
+ case MVT::v8i16: {
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ }
+ case MVT::v4i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
+ }
+ case MVT::v2i64: {
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
+
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
+ }
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ }
+
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // If we have a single element being moved from V1 to V2, this can be handled
+ // using the C*[DX] compute mask instructions, but the vector elements have
+ // to be monotonically increasing with one exception element.
+ MVT VecVT = V1.getValueType();
+ MVT EltVT = VecVT.getVectorElementType();
+ unsigned EltsFromV2 = 0;
+ unsigned V2Elt = 0;
+ unsigned V2EltIdx0 = 0;
+ unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ unsigned V0Elt = 0;
+ bool monotonic = true;
+ bool rotate = true;
+
+ if (EltVT == MVT::i8) {
+ V2EltIdx0 = 16;
+ } else if (EltVT == MVT::i16) {
+ V2EltIdx0 = 8;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+ V2EltIdx0 = 4;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ } else
+ assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
+
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ if (1 >= (++EltsFromV2)) {
+ V2Elt = (V2EltIdx0 - SrcElt) << 2;
+ }
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
+
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ if (SrcElt == 0)
+ V0Elt = i;
+ } else {
+ rotate = false;
+ }
+ } else if (PrevElt == 0) {
+ // First time through, need to keep track of previous element
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
+
+ if (EltsFromV2 == 1 && monotonic) {
+ // Compute mask and shuffle
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Initialize temporary register to 0
+ SDValue InitTempReg =
+ DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
+ // Copy register's contents as index in SHUFFLE_MASK:
+ SDValue ShufMaskOp =
+ DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
+ DAG.getTargetConstant(V2Elt, MVT::i32),
+ DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+ // Use shuffle mask in SHUFB synthetic instruction:
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
+
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
+ } else {
+ // Convert the SHUFFLE_VECTOR mask's input element units to the
+ // actual bytes.
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+ }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op0.getNode()->getOpcode() == ISD::Constant) {
+ // For a constant, build the appropriate constant vector, which will
+ // eventually simplify to a vector register load.
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+ SmallVector<SDValue, 16> ConstVecValues;
+ MVT VT;
+ size_t n_copies;
+
+ // Create a constant vector:
+ switch (Op.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ }
+
+ SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+ for (size_t j = 0; j < n_copies; ++j)
+ ConstVecValues.push_back(CValue);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+ &ConstVecValues[0], ConstVecValues.size());
+ } else {
+ // Otherwise, copy the value from one register to another:
+ switch (Op0.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
+
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+ switch (VT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
+
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16];
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
+
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
+
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ MVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
+ abort();
+ }
+
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
+
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
+
+ vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
+
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT()) {
+ default:
+ cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
+ abort();
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
+ }
+
+ return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ SDValue VecOp = Op.getOperand(0);
+ SDValue ValOp = Op.getOperand(1);
+ SDValue IdxOp = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(CN->getSExtValue(), PtrVT));
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+
+ SDValue result =
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+ VecOp,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
+
+ return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
+{
+ SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ MVT ShiftVT = TLI.getShiftAmountTy();
+
+ assert(Op.getValueType() == MVT::i8);
+ switch (Opc) {
+ default:
+ assert(0 && "Unhandled i8 math operator");
+ /*NOTREACHED*/
+ break;
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
+ case ISD::SUB: {
+ // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::ROTR:
+ case ISD::ROTL: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
+ SDValue ExpandArg =
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+ }
+ case ISD::SRL:
+ case ISD::SHL: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+ SDValue ConstVec;
+ SDValue Arg;
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ ConstVec = Op.getOperand(0);
+ Arg = Op.getOperand(1);
+ if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ ConstVec = ConstVec.getOperand(0);
+ } else {
+ ConstVec = Op.getOperand(1);
+ Arg = Op.getOperand(0);
+ if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
+ ConstVec = ConstVec.getOperand(0);
+ }
+ }
+ }
+
+ if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+ }
+ }
+
+ // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+ // lowered. Return the operation, rather than a null SDValue.
+ return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+ Custom lowering code that counts the number ones in the input
+ operand. SPU has such an instruction, but it counts the number of
+ ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (VT.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: {
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+ }
+
+ case MVT::i16: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ Tmp1, Shift1),
+ Tmp1),
+ Mask0);
+ }
+
+ case MVT::i32: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Comp1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
+
+ SDValue Sum1 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+ SDValue Sum1_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+ SDValue Comp2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+ Shift2);
+ SDValue Sum2 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+ }
+
+ case MVT::i64:
+ break;
+ }
+
+ return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ MVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ SPUTargetLowering &TLI) {
+ MVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ MVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ MVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
+ abort();
+ break;
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ MVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ MVT VT = Op.getValueType();
+ MVT::SimpleValueType simpleVT = VT.getSimpleVT();
+ MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType();
+
+ if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+{
+ unsigned Opc = (unsigned) Op.getOpcode();
+ MVT VT = Op.getValueType();
+
+ switch (Opc) {
+ default: {
+ cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ cerr << "Op.getOpcode() = " << Opc << "\n";
+ cerr << "*Op.getNode():\n";
+ Op.getNode()->dump();
+ abort();
+ }
+ case ISD::LOAD:
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD:
+ case ISD::ZEXTLOAD:
+ return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantFP:
+ return LowerConstantFP(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
+ case ISD::CALL:
+ return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::RET:
+ return LowerRET(Op, DAG, getTargetMachine());
+
+ // i8, i64 math ops:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case ISD::SRL:
+ case ISD::SHL:
+ case ISD::SRA: {
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+ break;
+ }
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+ // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return LowerByteImmed(Op, DAG);
+
+ // Vector and i8 multiply:
+ case ISD::MUL:
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG)
+{
+#if 0
+ unsigned Opc = (unsigned) N->getOpcode();
+ MVT OpVT = N->getValueType(0);
+
+ switch (Opc) {
+ default: {
+ cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ cerr << "Op.getOpcode() = " << Opc << "\n";
+ cerr << "*Op.getNode():\n";
+ N->dump();
+ abort();
+ /*NOTREACHED*/
+ }
+ }
+#endif
+
+ /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+ TargetMachine &TM = getTargetMachine();
+#endif
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ MVT NodeVT = N->getValueType(0); // The node's value type
+ MVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: {
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+ // (any_extend (SPUextract_elt0 <arg>)) ->
+ // (SPUextract_elt0 <arg>)
+ // Types must match, however...
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\nReplace: ";
+ N->dump(&DAG);
+ cerr << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
+
+ return Op0;
+ }
+ break;
+ }
+ case SPUISD::IndirectAddr: {
+ if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->getZExtValue() == 0) {
+ // (SPUindirect (SPUaform <addr>, 0), 0) ->
+ // (SPUaform <addr>, 0)
+
+ DEBUG(cerr << "Replace: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(cerr << "\nWith: ");
+ DEBUG(Op0.getNode()->dump(&DAG));
+ DEBUG(cerr << "\n");
+
+ return Op0;
+ }
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+ }
+ break;
+ }
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_SHL:
+ case SPUISD::VEC_SRL:
+ case SPUISD::VEC_SRA:
+ case SPUISD::ROTBYTES_LEFT: {
+ SDValue Op1 = N->getOperand(1);
+
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
+ Result = Op0;
+ }
+ }
+ break;
+ }
+ case SPUISD::PREFSLOT2VEC: {
+ switch (Op0.getOpcode()) {
+ default:
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // <arg>
+ // but only if the SPUprefslot2vec and <arg> types match.
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+ SDValue Op000 = Op00.getOperand(0);
+ if (Op000.getValueType() == NodeVT) {
+ Result = Op000;
+ }
+ }
+ break;
+ }
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+ // <arg>
+ Result = Op0.getOperand(0);
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ // Otherwise, return unchanged.
+#ifndef NDEBUG
+ if (Result.getNode()) {
+ DEBUG(cerr << "\nReplace.SPU: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(cerr << "\nWith: ");
+ DEBUG(Result.getNode()->dump(&DAG));
+ DEBUG(cerr << "\n");
+ }
+#endif
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+ if (ConstraintLetter.size() == 1) {
+ switch (ConstraintLetter[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64)
+ return std::make_pair(0U, SPU::R64CRegisterClass);
+ return std::make_pair(0U, SPU::R32CRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, SPU::R32FPRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, SPU::R64FPRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, SPU::GPRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth ) const {
+#if 0
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+ switch (Op.getOpcode()) {
+ default:
+ // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ break;
+ case CALL:
+ case SHUFB:
+ case SHUFFLE_MASK:
+ case CNTB:
+ case SPUISD::PREFSLOT2VEC:
+ case SPUISD::LDRESULT:
+ case SPUISD::VEC2PREFSLOT:
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_SHL:
+ case SPUISD::VEC_SRL:
+ case SPUISD::VEC_SRA:
+ case SPUISD::VEC_ROTL:
+ case SPUISD::VEC_ROTR:
+ case SPUISD::ROTBYTES_LEFT:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ MVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Default, for the time being, to the base class handler
+ TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
+ Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+ const Type *Ty) const {
+ // SPU's addresses are 256K:
+ return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The SPU target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 000000000000..866c632d527a
--- /dev/null
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,154 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+ namespace SPUISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Pseudo instructions:
+ RET_FLAG, ///< Return with flag, matched by bi instruction
+
+ Hi, ///< High address component (upper 16)
+ Lo, ///< Low address component (lower 16)
+ PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
+ IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
+
+ LDRESULT, ///< Load result (value, chain)
+ CALL, ///< CALL instruction
+ SHUFB, ///< Vector shuffle (permute)
+ SHUFFLE_MASK, ///< Shuffle mask
+ CNTB, ///< Count leading ones in bytes
+ PREFSLOT2VEC, ///< Promote scalar->vector
+ VEC2PREFSLOT, ///< Extract element 0
+ SHLQUAD_L_BITS, ///< Rotate quad left, by bits
+ SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes
+ VEC_SHL, ///< Vector shift left
+ VEC_SRL, ///< Vector shift right (logical)
+ VEC_SRA, ///< Vector shift right (arithmetic)
+ VEC_ROTL, ///< Vector rotate left
+ VEC_ROTR, ///< Vector rotate right
+ ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+ SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
+ LAST_SPUISD ///< Last user-defined instruction
+ };
+ }
+
+ //! Utility functions specific to CellSPU:
+ namespace SPU {
+ SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ MVT ValueType);
+ SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+ SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+ //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ DebugLoc dl);
+ }
+
+ class SPUTargetMachine; // forward dec'l.
+
+ class SPUTargetLowering :
+ public TargetLowering
+ {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ SPUTargetMachine &SPUTM;
+
+ public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
+ SPUTargetLowering(SPUTargetMachine &TM);
+
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ValueType for ISD::SETCC
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ //! Custom lowering hooks
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ //! Custom lowering hook for nodes with illegal result types.
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
+ ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 000000000000..5e268f8767c2
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 000000000000..21bc275209c6
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,298 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SPU";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin> {
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+ // RR Format, where RB is zeroed (dont care):
+ class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ let RA = 0 in {
+ // RR Format, where RA and RB are zeroed (dont care):
+ // Used for reads from status control registers (see FPSCRRr32)
+ class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+ }
+}
+
+let RT = 0 in {
+ // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+ // says, "RT is a false target." Used in "Halt if" instructions
+ class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RC;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-3} = opcode;
+ let Inst{4-10} = RT;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> i7;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = i7;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-9} = opcode;
+ let Inst{10-17} = 0;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ let RT = 0 in {
+ // Branch instruction format (without D/E flag settings)
+ class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+ : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+ pattern>
+ { }
+
+ let RB = 0 in {
+ // Return instruction (bi, branch indirect), RA is zero (LR):
+ class RETForm<string asmstr, list<dag> pattern>
+ : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+ pattern>
+ { }
+ }
+ }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+ : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+ bits<7> Rcalldest;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b11010101100;
+ let Inst{11} = 0;
+ let Inst{12-13} = DE_flag;
+ let Inst{14-17} = 0b0000;
+ let Inst{18-24} = Rcalldest;
+ let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<10> i10;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-7} = opcode;
+ let Inst{8-17} = i10;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+ class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+ class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<16> i16;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-8} = opcode;
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+ class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<18> i18;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-24} = i18;
+ let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 000000000000..4af995a78139
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,693 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+ //! Predicate for an unconditional branch instruction
+ inline bool isUncondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BR
+ || opc == SPU::BRA
+ || opc == SPU::BI);
+ }
+
+ //! Predicate for a conditional branch instruction
+ inline bool isCondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
+ }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+ : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])),
+ TM(tm),
+ RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+bool
+SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case SPU::ORIv4i32:
+ case SPU::ORIr32:
+ case SPU::ORHIv8i16:
+ case SPU::ORHIr16:
+ case SPU::ORHIi8i16:
+ case SPU::ORBIv16i8:
+ case SPU::ORBIr8:
+ case SPU::ORIi16i32:
+ case SPU::ORIi8i32:
+ case SPU::AHIvec:
+ case SPU::AHIr16:
+ case SPU::AIv4i32:
+ assert(MI.getNumOperands() == 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
+ if (MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ case SPU::AIr32:
+ assert(MI.getNumOperands() == 3 &&
+ "wrong number of operands to AIr32");
+ if (MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ (MI.getOperand(2).isImm() &&
+ MI.getOperand(2).getImm() == 0)) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ case SPU::LRr8:
+ case SPU::LRr16:
+ case SPU::LRr32:
+ case SPU::LRf32:
+ case SPU::LRr64:
+ case SPU::LRf64:
+ case SPU::LRr128:
+ case SPU::LRv16i8:
+ case SPU::LRv8i16:
+ case SPU::LRv4i32:
+ case SPU::LRv4f32:
+ case SPU::LRv2i64:
+ case SPU::LRv2f64:
+ case SPU::ORv16i8_i8:
+ case SPU::ORv8i16_i16:
+ case SPU::ORv4i32_i32:
+ case SPU::ORv2i64_i64:
+ case SPU::ORv4f32_f32:
+ case SPU::ORv2f64_f64:
+ case SPU::ORi8_v16i8:
+ case SPU::ORi16_v8i16:
+ case SPU::ORi32_v4i32:
+ case SPU::ORi64_v2i64:
+ case SPU::ORf32_v4f32:
+ case SPU::ORf64_v2f64:
+/*
+ case SPU::ORi128_r64:
+ case SPU::ORi128_f64:
+ case SPU::ORi128_r32:
+ case SPU::ORi128_f32:
+ case SPU::ORi128_r16:
+ case SPU::ORi128_r8:
+*/
+ case SPU::ORi128_vec:
+/*
+ case SPU::ORr64_i128:
+ case SPU::ORf64_i128:
+ case SPU::ORr32_i128:
+ case SPU::ORf32_i128:
+ case SPU::ORr16_i128:
+ case SPU::ORr8_i128:
+*/
+ case SPU::ORvec_i128:
+/*
+ case SPU::ORr16_r32:
+ case SPU::ORr8_r32:
+ case SPU::ORf32_r32:
+ case SPU::ORr32_f32:
+ case SPU::ORr32_r16:
+ case SPU::ORr32_r8:
+ case SPU::ORr16_r64:
+ case SPU::ORr8_r64:
+ case SPU::ORr64_r16:
+ case SPU::ORr64_r8:
+*/
+ case SPU::ORr64_r32:
+ case SPU::ORr32_r64:
+ case SPU::ORf32_r32:
+ case SPU::ORr32_f32:
+ case SPU::ORf64_r64:
+ case SPU::ORr64_f64: {
+ assert(MI.getNumOperands() == 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid SPU OR<type>_<vec> or LR instruction!");
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORr128:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ assert(MI.getNumOperands() == 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid SPU OR(vec|r32|r64|gprc) instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const
+{
+ // We support cross register class moves for our aliases, such as R3 in any
+ // reg class to any other reg class containing R3. This is required because
+ // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+ // types have no specific meaning.
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == SPU::R8CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R16CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R32CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R32FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R64CRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::R64FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg);
+ } else if (DestRC == SPU::VECREGRegisterClass) {
+ BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg);
+ } else {
+ // Attempt to copy unknown/unsupported register class!
+ return false;
+ }
+
+ return true;
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void SPUInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ cerr << "storeRegToAddr() invoked!\n";
+ abort();
+
+ if (Addr[0].isFI()) {
+ /* do what storeRegToStackSlot does here */
+ } else {
+ unsigned Opc = 0;
+ if (RC == SPU::GPRCRegisterClass) {
+ /* Opc = PPC::STW; */
+ } else if (RC == SPU::R16CRegisterClass) {
+ /* Opc = PPC::STD; */
+ } else if (RC == SPU::R32CRegisterClass) {
+ /* Opc = PPC::STFD; */
+ } else if (RC == SPU::R32FPRegisterClass) {
+ /* Opc = PPC::STFD; */
+ } else if (RC == SPU::R64FPRegisterClass) {
+ /* Opc = PPC::STFS; */
+ } else if (RC == SPU::VECREGRegisterClass) {
+ /* Opc = PPC::STVX; */
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ }
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ } else {
+ assert(0 && "Unknown regclass in loadRegFromStackSlot!");
+ abort();
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+/*!
+ \note We are really pessimistic here about what kind of a load we're doing.
+ */
+void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)
+ const {
+ cerr << "loadRegToAddr() invoked!\n";
+ abort();
+
+ if (Addr[0].isFI()) {
+ /* do what loadRegFromStackSlot does here... */
+ } else {
+ unsigned Opc = 0;
+ if (RC == SPU::R8CRegisterClass) {
+ /* do brilliance here */
+ } else if (RC == SPU::R16CRegisterClass) {
+ /* Opc = PPC::LWZ; */
+ } else if (RC == SPU::R32CRegisterClass) {
+ /* Opc = PPC::LD; */
+ } else if (RC == SPU::R32FPRegisterClass) {
+ /* Opc = PPC::LFD; */
+ } else if (RC == SPU::R64FPRegisterClass) {
+ /* Opc = PPC::LFS; */
+ } else if (RC == SPU::VECREGRegisterClass) {
+ /* Opc = PPC::LVX; */
+ } else if (RC == SPU::GPRCRegisterClass) {
+ /* Opc = something else! */
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ }
+}
+
+//! Return true if the specified load or store can be folded
+bool
+SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ // Make sure this is a reg-reg copy.
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg())
+ return true;
+ break;
+ }
+
+ return false;
+}
+
+/// foldMemoryOperand - SPU, like PPC, can only fold spills into
+/// copy instructions, turning them into load/store instructions.
+MachineInstr *
+SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const
+{
+ if (Ops.size() != 1) return 0;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = 0;
+
+ switch (Opc) {
+ case SPU::ORv16i8:
+ case SPU::ORv8i16:
+ case SPU::ORv4i32:
+ case SPU::ORv2i64:
+ case SPU::ORr8:
+ case SPU::ORr16:
+ case SPU::ORr32:
+ case SPU::ORr64:
+ case SPU::ORf32:
+ case SPU::ORf64:
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
+ get(SPU::STQDr32));
+
+ MIB.addReg(InReg, getKillRegState(isKill));
+ NewMI = addFrameReference(MIB, FrameIndex);
+ }
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
+
+ MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead));
+ Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
+ ? SPU::STQDr32 : SPU::STQXr32;
+ NewMI = addFrameReference(MIB, FrameIndex);
+ break;
+ }
+ }
+
+ return NewMI;
+}
+
+//! Branch analysis
+/*!
+ \note This code was kiped from PPC. There may be more branch analysis for
+ CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranch(LastInst)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(LastInst)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ DEBUG(cerr << "Pushing LastInst: ");
+ DEBUG(LastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a conditional and unconditional branch, handle it.
+ if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ DEBUG(cerr << "Pushing SecondLastInst: ");
+ DEBUG(SecondLastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ if (!isCondBranch(I) && !isUncondBranch(I))
+ return 0;
+
+ // Remove the first branch.
+ DEBUG(cerr << "Removing branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ I = MBB.end();
+ if (I == MBB.begin())
+ return 1;
+
+ --I;
+ if (!(isCondBranch(I) || isUncondBranch(I)))
+ return 1;
+
+ // Remove the second branch.
+ DEBUG(cerr << "Removing second branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SPU branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Unconditional branch
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR));
+ MIB.addMBB(TBB);
+
+ DEBUG(cerr << "Inserted one-way uncond branch: ");
+ DEBUG((*MIB).dump());
+ } else {
+ // Conditional branch
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+ DEBUG(cerr << "Inserted one-way cond branch: ");
+ DEBUG((*MIB).dump());
+ }
+ return 1;
+ } else {
+ MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR));
+
+ // Two-way Conditional Branch.
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ MIB2.addMBB(FBB);
+
+ DEBUG(cerr << "Inserted conditional branch: ");
+ DEBUG((*MIB).dump());
+ DEBUG(cerr << "part 2: ");
+ DEBUG((*MIB2).dump());
+ return 2;
+ }
+}
+
+bool
+SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ return (!MBB.empty() && isUncondBranch(&MBB.back()));
+}
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // Pretty brainless way of inverting the condition, but it works, considering
+ // there are only two conditions...
+ static struct {
+ unsigned Opc; //! The incoming opcode
+ unsigned RevCondOpc; //! The reversed condition opcode
+ } revconds[] = {
+ { SPU::BRNZr32, SPU::BRZr32 },
+ { SPU::BRNZv4i32, SPU::BRZv4i32 },
+ { SPU::BRZr32, SPU::BRNZr32 },
+ { SPU::BRZv4i32, SPU::BRNZv4i32 },
+ { SPU::BRHNZr16, SPU::BRHZr16 },
+ { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+ { SPU::BRHZr16, SPU::BRHNZr16 },
+ { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+ };
+
+ unsigned Opc = unsigned(Cond[0].getImm());
+ // Pretty dull mapping between the two conditions that SPU can generate:
+ for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+ if (revconds[i].Opc == Opc) {
+ Cond[0].setImm(revconds[i].RevCondOpc);
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 000000000000..ffb40875ff10
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,114 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+namespace llvm {
+ //! Cell SPU instruction information class
+ class SPUInstrInfo : public TargetInstrInfoImpl {
+ SPUTargetMachine &TM;
+ const SPURegisterInfo RI;
+ protected:
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ public:
+ explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ //! Store a register to a stack slot, based on its register class.
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ //! Store a register to an address, based on its register class
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ //! Load a register from a stack slot, based on its register class.
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ //! Loqad a register from an address, based on its register class
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ //! Return true if the specified load or store can be folded
+ virtual
+ bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ //! Return true if the specified block does not fall through
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
+ //! Reverses a branch's condition, returning false on success.
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ };
+}
+
+#endif
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 000000000000..63eb85a2921e
--- /dev/null
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4614 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// DWARF debugging Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+ class LoadDFormVec<ValueType vectype>
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+ { }
+
+ class LoadDForm<RegisterClass rclass>
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load dform_addr:$src))]>
+ { }
+
+ multiclass LoadDForms
+ {
+ def v16i8: LoadDFormVec<v16i8>;
+ def v8i16: LoadDFormVec<v8i16>;
+ def v4i32: LoadDFormVec<v4i32>;
+ def v2i64: LoadDFormVec<v2i64>;
+ def v4f32: LoadDFormVec<v4f32>;
+ def v2f64: LoadDFormVec<v2f64>;
+
+ def v2i32: LoadDFormVec<v2i32>;
+
+ def r128: LoadDForm<GPRC>;
+ def r64: LoadDForm<R64C>;
+ def r32: LoadDForm<R32C>;
+ def f32: LoadDForm<R32FP>;
+ def f64: LoadDForm<R64FP>;
+ def r16: LoadDForm<R16C>;
+ def r8: LoadDForm<R8C>;
+ }
+
+ class LoadAFormVec<ValueType vectype>
+ : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+ { }
+
+ class LoadAForm<RegisterClass rclass>
+ : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load aform_addr:$src))]>
+ { }
+
+ multiclass LoadAForms
+ {
+ def v16i8: LoadAFormVec<v16i8>;
+ def v8i16: LoadAFormVec<v8i16>;
+ def v4i32: LoadAFormVec<v4i32>;
+ def v2i64: LoadAFormVec<v2i64>;
+ def v4f32: LoadAFormVec<v4f32>;
+ def v2f64: LoadAFormVec<v2f64>;
+
+ def v2i32: LoadAFormVec<v2i32>;
+
+ def r128: LoadAForm<GPRC>;
+ def r64: LoadAForm<R64C>;
+ def r32: LoadAForm<R32C>;
+ def f32: LoadAForm<R32FP>;
+ def f64: LoadAForm<R64FP>;
+ def r16: LoadAForm<R16C>;
+ def r8: LoadAForm<R8C>;
+ }
+
+ class LoadXFormVec<ValueType vectype>
+ : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+ { }
+
+ class LoadXForm<RegisterClass rclass>
+ : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load xform_addr:$src))]>
+ { }
+
+ multiclass LoadXForms
+ {
+ def v16i8: LoadXFormVec<v16i8>;
+ def v8i16: LoadXFormVec<v8i16>;
+ def v4i32: LoadXFormVec<v4i32>;
+ def v2i64: LoadXFormVec<v2i64>;
+ def v4f32: LoadXFormVec<v4f32>;
+ def v2f64: LoadXFormVec<v2f64>;
+
+ def v2i32: LoadXFormVec<v2i32>;
+
+ def r128: LoadXForm<GPRC>;
+ def r64: LoadXForm<R64C>;
+ def r32: LoadXForm<R32C>;
+ def f32: LoadXForm<R32FP>;
+ def f64: LoadXForm<R64FP>;
+ def r16: LoadXForm<R16C>;
+ def r8: LoadXForm<R8C>;
+ }
+
+ defm LQA : LoadAForms;
+ defm LQD : LoadDForms;
+ defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+ Might be of use later for relocatable code. It's effectively the
+ same as LQA, but uses PC-relative addressing.
+ def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+ "lqr\t$rT, $disp", LoadStore,
+ [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+ def v16i8: StoreDFormVec<v16i8>;
+ def v8i16: StoreDFormVec<v8i16>;
+ def v4i32: StoreDFormVec<v4i32>;
+ def v2i64: StoreDFormVec<v2i64>;
+ def v4f32: StoreDFormVec<v4f32>;
+ def v2f64: StoreDFormVec<v2f64>;
+
+ def v2i32: StoreDFormVec<v2i32>;
+
+ def r128: StoreDForm<GPRC>;
+ def r64: StoreDForm<R64C>;
+ def r32: StoreDForm<R32C>;
+ def f32: StoreDForm<R32FP>;
+ def f64: StoreDForm<R64FP>;
+ def r16: StoreDForm<R16C>;
+ def r8: StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+ : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+ : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+ def v16i8: StoreAFormVec<v16i8>;
+ def v8i16: StoreAFormVec<v8i16>;
+ def v4i32: StoreAFormVec<v4i32>;
+ def v2i64: StoreAFormVec<v2i64>;
+ def v4f32: StoreAFormVec<v4f32>;
+ def v2f64: StoreAFormVec<v2f64>;
+
+ def v2i32: StoreAFormVec<v2i32>;
+
+ def r128: StoreAForm<GPRC>;
+ def r64: StoreAForm<R64C>;
+ def r32: StoreAForm<R32C>;
+ def f32: StoreAForm<R32FP>;
+ def f64: StoreAForm<R64FP>;
+ def r16: StoreAForm<R16C>;
+ def r8: StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+ : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+ : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+ def v16i8: StoreXFormVec<v16i8>;
+ def v8i16: StoreXFormVec<v8i16>;
+ def v4i32: StoreXFormVec<v4i32>;
+ def v2i64: StoreXFormVec<v2i64>;
+ def v4f32: StoreXFormVec<v4f32>;
+ def v2f64: StoreXFormVec<v2f64>;
+
+ def v2i32: StoreXFormVec<v2i32>;
+
+ def r128: StoreXForm<GPRC>;
+ def r64: StoreXForm<R64C>;
+ def r32: StoreXForm<R32C>;
+ def f32: StoreXForm<R32FP>;
+ def f64: StoreXForm<R64FP>;
+ def r16: StoreXForm<R16C>;
+ def r8: StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+ be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+ "stqr\t$rT, $disp", LoadStore,
+ [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cbd\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cbx\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "chd\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "chx\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+ RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+ RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+ RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+ def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+ def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+ // TODO: Need v2f64, v4f32
+
+ def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+ def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+ def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+ def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+ def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+ def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+ def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+ def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+ // Loads the high portion of an address
+ def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+ // Used in custom lowering constant SFP loads:
+ def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+ LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+ def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+ def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+ def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+ def r32: ILARegInst<R32C, u18imm, imm18>;
+ def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+ def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
+ def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+ def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+ [/* no pattern */]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+ ImmLoad, pattern>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+ def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+ def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+ def r32: IOHLRegInst<R32C, i32imm>;
+ def f32: IOHLRegInst<R32FP, f32imm>;
+
+ def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+ RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+ "fsmbi\t$rT, $val",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+ def v16i8: FSMBIVec<v16i8>;
+ def v8i16: FSMBIVec<v8i16>;
+ def v4i32: FSMBIVec<v4i32>;
+ def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+ RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+ RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+ RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+ RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+
+ def r32: ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+ RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+ RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
+
+def SFHIvec:
+ RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+ (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
+
+def SFIvec:
+ RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+ (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+ (ins R32C:$rA, s10imm_i32:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+ RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+ VECREG:$rCarry),
+ "bgx\t$rT, $rA, $rB", IntegerOp,
+ []>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYr16:
+ RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYUv4i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYUr16:
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
+ "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYUIvec:
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
+
+def MPYUIr16:
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
+def MPYAv4i32:
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYHv4i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYHr32:
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
+ "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYSv4i32:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYSr16:
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHv8i16:
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
+ "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAr32:
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
+ "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHUv4i32:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHUr32:
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
+ "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+ RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "avgb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+ RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "absdb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+ RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sumb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHVecInst<ValueType vectype>:
+ XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ pattern>;
+
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHVecInst<v8i16>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v4i32, v8i16>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDst),
+ (sext (out_vectype VECREG:$rSrc)))]>;
+
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+ XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+ [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+
+multiclass ExtendWordToDoubleWord {
+ def v2i64: XSWDVecInst<v4i32, v2i64>;
+ def r64: XSWDRegInst<R32C, R64C>;
+
+ def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+ [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+ ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+ ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+ def v16i8: ANDVecInst<v16i8>;
+ def v8i16: ANDVecInst<v8i16>;
+ def v4i32: ANDVecInst<v4i32>;
+ def v2i64: ANDVecInst<v2i64>;
+
+ def r128: ANDRegInst<GPRC>;
+ def r64: ANDRegInst<R64C>;
+ def r32: ANDRegInst<R32C>;
+ def r16: ANDRegInst<R16C>;
+ def r8: ANDRegInst<R8C>;
+
+ //===---------------------------------------------
+ // Special instructions to perform the fabs instruction
+ def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ //===---------------------------------------------
+
+ // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+ // quantities -- see 16->32 zext pattern.
+ //
+ // This pattern is somewhat artificial, since it might match some
+ // compiler generated pattern but it is unlikely to do so.
+
+ def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+// N.B.: vnot_conv is one of those special target selection pattern fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+ ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+ def v16i8: ANDCVecInst<v16i8>;
+ def v8i16: ANDCVecInst<v8i16>;
+ def v4i32: ANDCVecInst<v4i32>;
+ def v2i64: ANDCVecInst<v2i64>;
+
+ def r128: ANDCRegInst<GPRC>;
+ def r64: ANDCRegInst<R64C>;
+ def r32: ANDCRegInst<R32C>;
+ def r16: ANDCRegInst<R16C>;
+ def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+ def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT),
+ (and (v16i8 VECREG:$rA),
+ (v16i8 v16i8U8Imm:$val)))]>;
+
+ def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+ def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+ def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Zero-extend i8 to i16:
+ def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+ def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+ def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+ // pattern below.
+ def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+ // zext 16->32 pattern below.
+ //
+ // Note that this pattern is somewhat artificial, since it might match
+ // something the compiler generates but is unlikely to occur in practice.
+ def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+ ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+ ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+// ORCvtForm: OR conversion form
+//
+// This is used to "convert" the preferred slot to its vector equivalent, as
+// well as convert a vector back to its preferred slot.
+//
+// These are effectively no-ops, but need to exist for proper type conversion
+// and type coercion.
+
+class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
+ : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b10000010000;
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+class ORPromoteScalar<RegisterClass rclass>:
+ ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>;
+
+class ORExtractElt<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
+
+/* class ORCvtRegGPRC<RegisterClass rclass>:
+ ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
+
+/* class ORCvtGPRCReg<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
+
+class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
+
+class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
+
+class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
+
+class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
+ ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
+
+class ORCvtGPRCVec:
+ ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
+
+class ORCvtVecGPRC:
+ ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+
+multiclass BitwiseOr
+{
+ def v16i8: ORVecInst<v16i8>;
+ def v8i16: ORVecInst<v8i16>;
+ def v4i32: ORVecInst<v4i32>;
+ def v2i64: ORVecInst<v2i64>;
+
+ def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4f32 VECREG:$rT),
+ (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))))]>;
+
+ def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v2f64 VECREG:$rT),
+ (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)))))]>;
+
+ def r128: ORRegInst<GPRC>;
+ def r64: ORRegInst<R64C>;
+ def r32: ORRegInst<R32C>;
+ def r16: ORRegInst<R16C>;
+ def r8: ORRegInst<R8C>;
+
+ // OR instructions used to copy f32 and f64 registers.
+ def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [/* no pattern */]>;
+
+ def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ [/* no pattern */]>;
+
+ // scalar->vector promotion, prefslot2vec:
+ def v16i8_i8: ORPromoteScalar<R8C>;
+ def v8i16_i16: ORPromoteScalar<R16C>;
+ def v4i32_i32: ORPromoteScalar<R32C>;
+ def v2i64_i64: ORPromoteScalar<R64C>;
+ def v4f32_f32: ORPromoteScalar<R32FP>;
+ def v2f64_f64: ORPromoteScalar<R64FP>;
+
+ // vector->scalar demotion, vec2prefslot:
+ def i8_v16i8: ORExtractElt<R8C>;
+ def i16_v8i16: ORExtractElt<R16C>;
+ def i32_v4i32: ORExtractElt<R32C>;
+ def i64_v2i64: ORExtractElt<R64C>;
+ def f32_v4f32: ORExtractElt<R32FP>;
+ def f64_v2f64: ORExtractElt<R64FP>;
+
+ // Conversion from vector to GPRC
+ def i128_vec: ORCvtVecGPRC;
+
+ // Conversion from GPRC to vector
+ def vec_i128: ORCvtGPRCVec;
+
+/*
+ // Conversion from register to GPRC
+ def i128_r64: ORCvtRegGPRC<R64C>;
+ def i128_f64: ORCvtRegGPRC<R64FP>;
+ def i128_r32: ORCvtRegGPRC<R32C>;
+ def i128_f32: ORCvtRegGPRC<R32FP>;
+ def i128_r16: ORCvtRegGPRC<R16C>;
+ def i128_r8: ORCvtRegGPRC<R8C>;
+
+ // Conversion from GPRC to register
+ def r64_i128: ORCvtGPRCReg<R64C>;
+ def f64_i128: ORCvtGPRCReg<R64FP>;
+ def r32_i128: ORCvtGPRCReg<R32C>;
+ def f32_i128: ORCvtGPRCReg<R32FP>;
+ def r16_i128: ORCvtGPRCReg<R16C>;
+ def r8_i128: ORCvtGPRCReg<R8C>;
+*/
+/*
+ // Conversion from register to R32C:
+ def r32_r16: ORCvtFormRegR32<R16C>;
+ def r32_r8: ORCvtFormRegR32<R8C>;
+
+ // Conversion from R32C to register
+ def r32_r16: ORCvtFormR32Reg<R16C>;
+ def r32_r8: ORCvtFormR32Reg<R8C>;
+*/
+
+ // Conversion from R64C to register:
+ def r32_r64: ORCvtFormR64Reg<R32C>;
+ // def r16_r64: ORCvtFormR64Reg<R16C>;
+ // def r8_r64: ORCvtFormR64Reg<R8C>;
+
+ // Conversion to R64C from register:
+ def r64_r32: ORCvtFormRegR64<R32C>;
+ // def r64_r16: ORCvtFormRegR64<R16C>;
+ // def r64_r8: ORCvtFormRegR64<R8C>;
+
+ // bitconvert patterns:
+ def r32_f32: ORCvtFormR32Reg<R32FP,
+ [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
+ def f32_r32: ORCvtFormRegR32<R32FP,
+ [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
+
+ def r64_f64: ORCvtFormR64Reg<R64FP,
+ [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
+ def f64_r64: ORCvtFormRegR64<R64FP,
+ [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
+}
+
+defm OR : BitwiseOr;
+
+// scalar->vector promotion patterns (preferred slot to vector):
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (ORv16i8_i8 R8C:$rA)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (ORv8i16_i16 R16C:$rA)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (ORv4i32_i32 R32C:$rA)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (ORv2i64_i64 R64C:$rA)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (ORv4f32_f32 R32FP:$rA)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (ORv2f64_f64 R64FP:$rA)>;
+
+// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise
+// known as converting the vector back to its preferred slot
+
+def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
+ (ORi8_v16i8 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
+ (ORi16_v8i16 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
+ (ORi32_v4i32 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
+ (ORi64_v2i64 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
+ (ORf32_v4f32 VECREG:$rA)>;
+
+def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
+ (ORf64_v2f64 VECREG:$rA)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+ ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+ ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+ def v16i8: ORCVecInst<v16i8>;
+ def v8i16: ORCVecInst<v8i16>;
+ def v4i32: ORCVecInst<v4i32>;
+ def v2i64: ORCVecInst<v2i64>;
+
+ def r128: ORCRegInst<GPRC>;
+ def r64: ORCRegInst<R64C>;
+ def r32: ORCRegInst<R32C>;
+ def r16: ORCRegInst<R16C>;
+ def r8: ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+ def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+ def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+ def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+ def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Specialized ORHI form used to promote 8-bit registers to 16-bit
+ def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+ [(set R16C:$rT, (or (anyext R8C:$rA),
+ i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+ def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+
+ // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+ // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R16C:$rA),
+ i32ImmSExt10:$val))]>;
+
+ // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+ // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R8C:$rA),
+ i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+ RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "orx\t$rT, $rA, $rB", IntegerOp,
+ []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+ XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+ XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+ def v16i8: XORVecInst<v16i8>;
+ def v8i16: XORVecInst<v8i16>;
+ def v4i32: XORVecInst<v4i32>;
+ def v2i64: XORVecInst<v2i64>;
+
+ def r128: XORRegInst<GPRC>;
+ def r64: XORRegInst<R64C>;
+ def r32: XORRegInst<R32C>;
+ def r16: XORRegInst<R16C>;
+ def r8: XORRegInst<R8C>;
+
+ // XOR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+ def v16i8:
+ XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+ def r8:
+ XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+ RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+ v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+ RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+ NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+ NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+ def v16i8: NANDVecInst<v16i8>;
+ def v8i16: NANDVecInst<v8i16>;
+ def v4i32: NANDVecInst<v4i32>;
+ def v2i64: NANDVecInst<v2i64>;
+
+ def r128: NANDRegInst<GPRC>;
+ def r64: NANDRegInst<R64C>;
+ def r32: NANDRegInst<R32C>;
+ def r16: NANDRegInst<R16C>;
+ def r8: NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+ NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+ NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+ def v16i8: NORVecInst<v16i8>;
+ def v8i16: NORVecInst<v8i16>;
+ def v4i32: NORVecInst<v4i32>;
+ def v2i64: NORVecInst<v2i64>;
+
+ def r128: NORRegInst<GPRC>;
+ def r64: NORRegInst<R64C>;
+ def r32: NORRegInst<R32C>;
+ def r16: NORRegInst<R16C>;
+ def r8: NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+ (and (vnot_frag (vectype VECREG:$rC)),
+ (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+ [(set rclass:$rT,
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+ def v16i8: SELBVecInst<v16i8>;
+ def v8i16: SELBVecInst<v8i16>;
+ def v4i32: SELBVecInst<v4i32>;
+ def v2i64: SELBVecInst<v2i64, vnot_conv>;
+
+ def r128: SELBRegInst<GPRC>;
+ def r64: SELBRegInst<R64C>;
+ def r32: SELBRegInst<R32C>;
+ def r16: SELBRegInst<R16C>;
+ def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
+ // SELBr64_cond is defined in SPU64InstrInfo.td
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+ Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+ (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+ Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+ (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C, SELBr8>;
+def : SPUselbPatReg<R16C, SELBr16>;
+def : SPUselbPatReg<R32C, SELBr32>;
+def : SPUselbPatReg<R64C, SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (and (vnot (vectype VECREG:$rA)),
+ (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+ (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (or (and rclass:$rA, rclass:$rB),
+ (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+ def v16i8: EQVVecInst<v16i8>;
+ def v8i16: EQVVecInst<v8i16>;
+ def v4i32: EQVVecInst<v4i32>;
+ def v2i64: EQVVecInst<v2i64>;
+
+ def v16i8_1: EQVVecPattern1<v16i8>;
+ def v8i16_1: EQVVecPattern1<v8i16>;
+ def v4i32_1: EQVVecPattern1<v4i32>;
+ def v2i64_1: EQVVecPattern1<v2i64>;
+
+ def v16i8_2: EQVVecPattern2<v16i8>;
+ def v8i16_2: EQVVecPattern2<v8i16>;
+ def v4i32_2: EQVVecPattern2<v4i32>;
+ def v2i64_2: EQVVecPattern2<v2i64>;
+
+ def v16i8_3: EQVVecPattern3<v16i8>;
+ def v8i16_3: EQVVecPattern3<v8i16>;
+ def v4i32_3: EQVVecPattern3<v4i32>;
+ def v2i64_3: EQVVecPattern3<v2i64>;
+
+ def r128: EQVRegInst<GPRC>;
+ def r64: EQVRegInst<R64C>;
+ def r32: EQVRegInst<R32C>;
+ def r16: EQVRegInst<R16C>;
+ def r8: EQVRegInst<R8C>;
+
+ def r128_1: EQVRegPattern1<GPRC>;
+ def r64_1: EQVRegPattern1<R64C>;
+ def r32_1: EQVRegPattern1<R32C>;
+ def r16_1: EQVRegPattern1<R16C>;
+ def r8_1: EQVRegPattern1<R8C>;
+
+ def r128_2: EQVRegPattern2<GPRC>;
+ def r64_2: EQVRegPattern2<R64C>;
+ def r32_2: EQVRegPattern2<R32C>;
+ def r16_2: EQVRegPattern2<R16C>;
+ def r8_2: EQVRegPattern2<R8C>;
+
+ def r128_3: EQVRegPattern3<GPRC>;
+ def r64_3: EQVRegPattern3<R64C>;
+ def r32_3: EQVRegPattern3<R32C>;
+ def r16_3: EQVRegPattern3<R16C>;
+ def r8_3: EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+ SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (resultvec VECREG:$rT),
+ (SPUshuffle (resultvec VECREG:$rA),
+ (resultvec VECREG:$rB),
+ (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+ def v16i8 : SHUFBVecInst<v16i8, v16i8>;
+ def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+ def v8i16 : SHUFBVecInst<v8i16, v16i8>;
+ def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+ def v4i32 : SHUFBVecInst<v4i32, v16i8>;
+ def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i64 : SHUFBVecInst<v2i64, v16i8>;
+ def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+ def v4f32 : SHUFBVecInst<v4f32, v16i8>;
+ def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+ def v2f64 : SHUFBVecInst<v2f64, v16i8>;
+ def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
+
+multiclass ShiftLeftHalfword
+{
+ def v8i16: SHLHVecInst<v8i16>;
+ def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+ def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+ SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+ def v8i16: SHLHIVecInst<v8i16>;
+ def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (SHLHIv8i16 VECREG:$rA, uimm7:$val)>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+ (SHLHIr16 R16C:$rA, uimm7:$val)>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+multiclass ShiftLeftWord
+{
+ def v4i32:
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>;
+ def r32:
+ SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+ def v4i32:
+ SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+ def r32:
+ SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+ [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+ SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+ SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+ def v16i8: SHLQBIVecInst<v16i8>;
+ def v8i16: SHLQBIVecInst<v8i16>;
+ def v4i32: SHLQBIVecInst<v4i32>;
+ def v4f32: SHLQBIVecInst<v4f32>;
+ def v2i64: SHLQBIVecInst<v2i64>;
+ def v2f64: SHLQBIVecInst<v2f64>;
+
+ def r128: SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+ SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+ def v16i8 : SHLQBIIVecInst<v16i8>;
+ def v8i16 : SHLQBIIVecInst<v8i16>;
+ def v4i32 : SHLQBIIVecInst<v4i32>;
+ def v4f32 : SHLQBIIVecInst<v4f32>;
+ def v2i64 : SHLQBIIVecInst<v2i64>;
+ def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+ SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+ def v16i8: SHLQBYVecInst<v16i8>;
+ def v8i16: SHLQBYVecInst<v8i16>;
+ def v4i32: SHLQBYVecInst<v4i32>;
+ def v4f32: SHLQBYVecInst<v4f32>;
+ def v2i64: SHLQBYVecInst<v2i64>;
+ def v2f64: SHLQBYVecInst<v2f64>;
+ def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+ SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+ def v16i8: SHLQBYIVecInst<v16i8>;
+ def v8i16: SHLQBYIVecInst<v8i16>;
+ def v4i32: SHLQBYIVecInst<v4i32>;
+ def v4f32: SHLQBYIVecInst<v4f32>;
+ def v2i64: SHLQBYIVecInst<v2i64>;
+ def v2f64: SHLQBYIVecInst<v2f64>;
+ def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+ [(set GPRC:$rT,
+ (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+ SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+ SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+ def v16i8: SHLQBYBIVecInst<v16i8>;
+ def v8i16: SHLQBYBIVecInst<v8i16>;
+ def v4i32: SHLQBYBIVecInst<v4i32>;
+ def v4f32: SHLQBYBIVecInst<v4f32>;
+ def v2i64: SHLQBYBIVecInst<v2i64>;
+ def v2f64: SHLQBYBIVecInst<v2f64>;
+
+ def r128: SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+ ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, VECREG:$rB))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+ ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+ def v8i16: ROTHVecInst<v8i16>;
+ def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+ ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+ def v8i16: ROTHIVecInst<v8i16>;
+ def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+ def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)),
+ (ROTHIv8i16 VECREG:$rA, imm:$val)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+ ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+ ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [(set rclass:$rT,
+ (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+ def v4i32: ROTVecInst<v4i32>;
+ def r32: ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+ def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+ def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+ def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+ def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
+ def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQBYVecInst<ValueType vectype>:
+ ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass RotateQuadLeftByBytes
+{
+ def v16i8: ROTQBYVecInst<v16i8>;
+ def v8i16: ROTQBYVecInst<v8i16>;
+ def v4i32: ROTQBYVecInst<v4i32>;
+ def v4f32: ROTQBYVecInst<v4f32>;
+ def v2i64: ROTQBYVecInst<v2i64>;
+ def v2f64: ROTQBYVecInst<v2f64>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+ ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass RotateQuadByBytesImm
+{
+ def v16i8: ROTQBYIVecInst<v16i8>;
+ def v8i16: ROTQBYIVecInst<v8i16>;
+ def v4i32: ROTQBYIVecInst<v4i32>;
+ def v4f32: ROTQBYIVecInst<v4f32>;
+ def v2i64: ROTQBYIVecInst<v2i64>;
+ def vfi64: ROTQBYIVecInst<v2f64>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotateShift, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+ def v16i8: ROTQBIVecInst<v16i8>;
+ def v8i16: ROTQBIVecInst<v8i16>;
+ def v4i32: ROTQBIVecInst<v4i32>;
+ def v2i64: ROTQBIVecInst<v2i64>;
+
+ def r128: ROTQBIRegInst<GPRC>;
+ def r64: ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+ def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+ def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+ def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+ def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+ def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+// so this only matches a synthetically generated/lowered code
+// fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+def ROTHMv8i16:
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB),
+ (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB),
+ (ROTHMv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB),
+ (ROTHMv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+ ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+ (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+def ROTHMIv8i16:
+ ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def ROTHMIr16:
+ ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+def ROTMv4i32:
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R32C:$rB),
+ (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R16C:$rB),
+ (ROTMv4i32 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, R8C:$rB),
+ (ROTMv4i32 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMr32:
+ ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+ (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+ RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotateShift,
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+
+def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+ RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotateShift,
+ [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+ (ROTMIr32 R32C:$rA, uimm7:$val)>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+ (ROTMIr32 R32C:$rA, uimm7:$val)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+ ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+ ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+ def v16i8: ROTQMBYVecInst<v16i8>;
+ def v8i16: ROTQMBYVecInst<v8i16>;
+ def v4i32: ROTQMBYVecInst<v4i32>;
+ def v2i64: ROTQMBYVecInst<v2i64>;
+
+ def r128: ROTQMBYRegInst<GPRC>;
+ def r64: ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+ ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+ def v16i8: ROTQMBYIVecInst<v16i8>;
+ def v8i16: ROTQMBYIVecInst<v8i16>;
+ def v4i32: ROTQMBYIVecInst<v4i32>;
+ def v2i64: ROTQMBYIVecInst<v2i64>;
+
+ def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+ def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+ def v16i8: ROTQMBYBIVecInst<v16i8>;
+ def v8i16: ROTQMBYBIVecInst<v8i16>;
+ def v4i32: ROTQMBYBIVecInst<v4i32>;
+ def v2i64: ROTQMBYBIVecInst<v2i64>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+ RotateShift, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+ ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+ ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+ def v16i8: ROTQMBIVecInst<v16i8>;
+ def v8i16: ROTQMBIVecInst<v8i16>;
+ def v4i32: ROTQMBIVecInst<v4i32>;
+ def v2i64: ROTQMBIVecInst<v2i64>;
+
+ def r128: ROTQMBIRegInst<GPRC>;
+ def r64: ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+ ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+ ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+ def v16i8: ROTQMBIIVecInst<v16i8>;
+ def v8i16: ROTQMBIIVecInst<v8i16>;
+ def v4i32: ROTQMBIIVecInst<v4i32>;
+ def v2i64: ROTQMBIIVecInst<v2i64>;
+
+ def r128: ROTQMBIIRegInst<GPRC>;
+ def r64: ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
+ (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
+ (ROTMAHv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
+ (ROTMAHv8i16 VECREG:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHr16:
+ RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+ (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+ RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ "rotmahi\t$rT, $rA, $val", RotateShift,
+ [(set (v8i16 VECREG:$rT),
+ (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+
+def ROTMAHIr16:
+ RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+ "rotmahi\t$rT, $rA, $val", RotateShift,
+ [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+
+def ROTMAv4i32:
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA), (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA),
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
+ (ROTMAv4i32 (v4i32 VECREG:$rA),
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAr32:
+ RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotateShift,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+ (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotateShift, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+ // Halt If Equal (r32 preferred slot only, no vector form)
+ def HEQr32:
+ RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+ "heq\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HEQIr32 :
+ RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+ "heqi\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+ // contrasting with HLGT/HLGTI, which use unsigned comparison:
+ def HGTr32:
+ RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HGTIr32:
+ RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTr32:
+ RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hlgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTIr32:
+ RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hlgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+ def v16i8 :
+ CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+ def v16i8 :
+ CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+ def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+ def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (seteq (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+ def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+ def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+ def v16i8 :
+ CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+ def v16i8 :
+ CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+ def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+ def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setgt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+ def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+ def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+ def v16i8 :
+ CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+ def v16i8 :
+ CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+ def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+ def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setugt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+ def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+ def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB : CmpEqualByte;
+defm CEQBI : CmpEqualByteImm;
+defm CEQH : CmpEqualHalfword;
+defm CEQHI : CmpEqualHalfwordImm;
+defm CEQ : CmpEqualWord;
+defm CEQI : CmpEqualWordImm;
+defm CGTB : CmpGtrByte;
+defm CGTBI : CmpGtrByteImm;
+defm CGTH : CmpGtrHalfword;
+defm CGTHI : CmpGtrHalfwordImm;
+defm CGT : CmpGtrWord;
+defm CGTI : CmpGtrWordImm;
+defm CLGTB : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT : CmpLGtrWord;
+defm CLGTI : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+ (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType immtype,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+ (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+ (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setle R8C:$rA, immU8:$imm),
+ (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+ (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+ (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+ (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setule R8C:$rA, immU8:$imm),
+ (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+ ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+ CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+ (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+ ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+ (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+ SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+ SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+ // All calls clobber the non-callee-saved registers:
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+ R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+ R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+ R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+ R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+ R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+ R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+ R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+ // All of these instructions use $lr (aka $0)
+ Uses = [R0] in {
+ // Branch relative and set link: Used if we actually know that the target
+ // is within [-32768, 32767] bytes of the target
+ def BRSL:
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ "brsl\t$$lr, $func",
+ [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+ // Branch absolute and set link: Used if we actually know that the target
+ // is an absolute address
+ def BRASL:
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ "brasl\t$$lr, $func",
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+ // Branch indirect and set link if external data. These instructions are not
+ // actually generated, matched by an intrinsic:
+ def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+ // Branch indirect and set link. This is the "X-form" address version of a
+ // function call
+ def BISL:
+ BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
+
+ // Indirect branch
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+
+ // Conditional branches:
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
+
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+ let isReturn = 1 in {
+ def RET:
+ RETForm<"bi\t$$lr", [(retflag)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+ FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+ def v4f32: FAVecInst<v4f32>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+ FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+ def v4f32: FSVecInst<v4f32>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+ RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+def FIf32 :
+ RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+ RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fceq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+ RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmeq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+ RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+ RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+ RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+ "fscrwr\t$rA", SPrecFP,
+ [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+ RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+ "fscrrd\t$rT", SPrecFP,
+ [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+// frest rC rB # c = 1/b (both lines)
+// fi rC rB rC
+// fm rD rA rC # d = a * 1/b
+// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+// fma rB rB rC rD # b = b * c + d
+// = -(d *b -a) * c + d
+// = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating point multiply instructions
+//--------------------------------------
+
+def FMv4f32:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def FMf32 :
+ RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+ CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+ CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+ CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+ RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>;
+
+def FESDf32 :
+ RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+// "frds\t$rT, $rA,", SPrecFP,
+// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+ RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+ "frds\t$rT, $rA", SPrecFP,
+ [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+ RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+ RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+ RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+ RRForm<0b00111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMAv2f64:
+ RRForm<0b00111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSf64 :
+ RRForm<0b10111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSv2f64 :
+ RRForm<0b10111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+ (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
+
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+ RRForm<0b11111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FNMAv2f64 :
+ RRForm<0b11111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+ (XORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+ (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+ (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+ (ANDfabsvec (v4f32 VECREG:$rA),
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+
+/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000010;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000000;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+ (ORi128_vec VECREG:$src)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+ (v16i8 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+ (v8i16 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+ (v4i32 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+ (v2i64 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+ (v4f32 (ORvec_i128 GPRC:$src))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+ (v2f64 (ORvec_i128 GPRC:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+ (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+ (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+ (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+ (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+ (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Call instruction patterns:
+//===----------------------------------------------------------------------===//
+// Return void
+def : Pat<(ret),
+ (RET)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+ (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+ (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+ (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+ (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
+ (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)))>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+ (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+ (ORIi8i32 R8C:$rSrc, 0)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+ (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+ (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+ (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+ (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+ (ORIi16i32 R16C:$rSrc, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (ORi8_v16i8
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (ORi8_v16i8
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (ORi8_v16i8
+ (SHUFBv4i32_m32
+ (ORv8i16_i16 R16C:$src),
+ (ORv8i16_i16 R16C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (ORi16_v8i16
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (ORi16_v8i16
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (ORi16_v8i16
+ (SHUFBv4i32_m32
+ (ORv4i32_i32 R32C:$src),
+ (ORv4i32_i32 R32C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (ORi32_v4i32
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (ORi32_v4i32
+ (SHUFBv2i64_m32
+ (ORv2i64_i64 R64C:$src),
+ (ORv2i64_i64 R64C:$src),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+ (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+ (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+ (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+ (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 000000000000..6a66967bc050
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,43 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+public:
+ SPUFunctionInfo(MachineFunction& MF)
+ : UsesLR(false)
+ {}
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 000000000000..80ebde3ef259
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
+ (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
+ (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 000000000000..87c4115d1b18
--- /dev/null
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,156 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Shift left quadword by bits and bytes
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>;
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+ SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 000000000000..802628f89965
--- /dev/null
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,655 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+def LO16 : SDNodeXForm<imm, [{
+ unsigned val = N->getZExtValue();
+ // Transformation function: get the low 16 bits.
+ return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ // Transformation function: get the low 16 bit immediate from a build_vector
+ // node.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "LO16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "HI16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+ int sextVal = int(N->getSExtValue());
+ return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8 : PatLeaf<(imm), [{
+ int Value = int(N->getSExtValue());
+ return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i64ImmSExt10 : PatLeaf<(imm), [{
+ return isI64IntS10Immediate(N);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10 : PatLeaf<(imm), [{
+ return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field. Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10 : PatLeaf<(imm), [{
+ return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10 : PatLeaf<(imm), [{
+ return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+ return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ short Ignored;
+ return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+ // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+ return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18 : PatLeaf<(imm), [{
+ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+ int Value = (int) N->getZExtValue();
+ return ((Value & ((1 << 19) - 1)) == Value);
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+ // lo16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = uint32_t(N->getZExtValue());
+ return ((val & 0xffff0000) == val);
+ } else if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((val & 0xffff0000ULL) == val);
+ }
+
+ return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+ // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+ // (shift left quadword by bits immediate)
+ int64_t Val = N->getZExtValue();
+ return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+ short Ignored;
+ return isFPS16Immediate(N, Ignored);
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((val & 0xffff0000) == val);
+ }
+
+ return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((Value & ((1 << 19) - 1)) == Value);
+ }
+
+ return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm : Operand<i32> {
+ let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+ let PrintMethod = "printPCRelativeOperand";
+ let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+ let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
+ let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
+ let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+ let PrintMethod = "printAddr256K";
+ let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+ let PrintMethod = "printMemRegImmS18";
+ let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form : [r+I10] (10-bit signed offset + reg)
+// X-form : [r+r] (reg+reg)
+// A-form : abs (256K LSA offset)
+// D-form(2): [r+I7] (7-bit signed offset + reg)
+
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", [], []>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", [], []>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", [], []>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", [], []>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 000000000000..e031048e7ccb
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,614 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace SPU;
+ switch (RegEnum) {
+ case SPU::R0: return 0;
+ case SPU::R1: return 1;
+ case SPU::R2: return 2;
+ case SPU::R3: return 3;
+ case SPU::R4: return 4;
+ case SPU::R5: return 5;
+ case SPU::R6: return 6;
+ case SPU::R7: return 7;
+ case SPU::R8: return 8;
+ case SPU::R9: return 9;
+ case SPU::R10: return 10;
+ case SPU::R11: return 11;
+ case SPU::R12: return 12;
+ case SPU::R13: return 13;
+ case SPU::R14: return 14;
+ case SPU::R15: return 15;
+ case SPU::R16: return 16;
+ case SPU::R17: return 17;
+ case SPU::R18: return 18;
+ case SPU::R19: return 19;
+ case SPU::R20: return 20;
+ case SPU::R21: return 21;
+ case SPU::R22: return 22;
+ case SPU::R23: return 23;
+ case SPU::R24: return 24;
+ case SPU::R25: return 25;
+ case SPU::R26: return 26;
+ case SPU::R27: return 27;
+ case SPU::R28: return 28;
+ case SPU::R29: return 29;
+ case SPU::R30: return 30;
+ case SPU::R31: return 31;
+ case SPU::R32: return 32;
+ case SPU::R33: return 33;
+ case SPU::R34: return 34;
+ case SPU::R35: return 35;
+ case SPU::R36: return 36;
+ case SPU::R37: return 37;
+ case SPU::R38: return 38;
+ case SPU::R39: return 39;
+ case SPU::R40: return 40;
+ case SPU::R41: return 41;
+ case SPU::R42: return 42;
+ case SPU::R43: return 43;
+ case SPU::R44: return 44;
+ case SPU::R45: return 45;
+ case SPU::R46: return 46;
+ case SPU::R47: return 47;
+ case SPU::R48: return 48;
+ case SPU::R49: return 49;
+ case SPU::R50: return 50;
+ case SPU::R51: return 51;
+ case SPU::R52: return 52;
+ case SPU::R53: return 53;
+ case SPU::R54: return 54;
+ case SPU::R55: return 55;
+ case SPU::R56: return 56;
+ case SPU::R57: return 57;
+ case SPU::R58: return 58;
+ case SPU::R59: return 59;
+ case SPU::R60: return 60;
+ case SPU::R61: return 61;
+ case SPU::R62: return 62;
+ case SPU::R63: return 63;
+ case SPU::R64: return 64;
+ case SPU::R65: return 65;
+ case SPU::R66: return 66;
+ case SPU::R67: return 67;
+ case SPU::R68: return 68;
+ case SPU::R69: return 69;
+ case SPU::R70: return 70;
+ case SPU::R71: return 71;
+ case SPU::R72: return 72;
+ case SPU::R73: return 73;
+ case SPU::R74: return 74;
+ case SPU::R75: return 75;
+ case SPU::R76: return 76;
+ case SPU::R77: return 77;
+ case SPU::R78: return 78;
+ case SPU::R79: return 79;
+ case SPU::R80: return 80;
+ case SPU::R81: return 81;
+ case SPU::R82: return 82;
+ case SPU::R83: return 83;
+ case SPU::R84: return 84;
+ case SPU::R85: return 85;
+ case SPU::R86: return 86;
+ case SPU::R87: return 87;
+ case SPU::R88: return 88;
+ case SPU::R89: return 89;
+ case SPU::R90: return 90;
+ case SPU::R91: return 91;
+ case SPU::R92: return 92;
+ case SPU::R93: return 93;
+ case SPU::R94: return 94;
+ case SPU::R95: return 95;
+ case SPU::R96: return 96;
+ case SPU::R97: return 97;
+ case SPU::R98: return 98;
+ case SPU::R99: return 99;
+ case SPU::R100: return 100;
+ case SPU::R101: return 101;
+ case SPU::R102: return 102;
+ case SPU::R103: return 103;
+ case SPU::R104: return 104;
+ case SPU::R105: return 105;
+ case SPU::R106: return 106;
+ case SPU::R107: return 107;
+ case SPU::R108: return 108;
+ case SPU::R109: return 109;
+ case SPU::R110: return 110;
+ case SPU::R111: return 111;
+ case SPU::R112: return 112;
+ case SPU::R113: return 113;
+ case SPU::R114: return 114;
+ case SPU::R115: return 115;
+ case SPU::R116: return 116;
+ case SPU::R117: return 117;
+ case SPU::R118: return 118;
+ case SPU::R119: return 119;
+ case SPU::R120: return 120;
+ case SPU::R121: return 121;
+ case SPU::R122: return 122;
+ case SPU::R123: return 123;
+ case SPU::R124: return 124;
+ case SPU::R125: return 125;
+ case SPU::R126: return 126;
+ case SPU::R127: return 127;
+ default:
+ cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n";
+ abort();
+ }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+ const TargetInstrInfo &tii) :
+ SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+ Subtarget(subtarget),
+ TII(tii)
+{
+}
+
+// SPU's 128-bit registers used for argument passing:
+static const unsigned SPU_ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+};
+
+const unsigned *
+SPURegisterInfo::getArgRegs()
+{
+ return SPU_ArgRegs;
+}
+
+unsigned
+SPURegisterInfo::getNumArgRegs()
+{
+ return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]);
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass * SPURegisterInfo::getPointerRegClass() const
+{
+ return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Cell ABI calling convention
+ static const unsigned SPU_CalleeSaveRegs[] = {
+ SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+ SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+ SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+ SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+ SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+ SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+ SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+ SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+ SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+ SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+ SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+ SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+ SPU::R2, /* environment pointer */
+ SPU::R1, /* stack pointer */
+ SPU::R0, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegs;
+}
+
+const TargetRegisterClass* const*
+SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ // Cell ABI Calling Convention
+ static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = {
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
+ &SPU::GPRCRegClass, /* environment pointer */
+ &SPU::GPRCRegClass, /* stack pointer */
+ &SPU::GPRCRegClass, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegClasses;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SPU::R0); // LR
+ Reserved.set(SPU::R1); // SP
+ Reserved.set(SPU::R2); // environment pointer
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+static bool needsFP(const MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool
+SPURegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getStackSize() && needsFP(MF);
+}
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+ const
+{
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &SPOp = MI.getOperand(i);
+ int FrameIndex = SPOp.getIndex();
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
+ MachineOperand &MO = MI.getOperand(OpNo);
+
+ // Offset is biased by $lr's slot at the bottom.
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize();
+ assert((Offset & 0xf) == 0
+ && "16-byte alignment violated in eliminateFrameIndex");
+
+ // Replace the FrameIndex with base register with $sp (aka $r1)
+ SPOp.ChangeToRegister(SPU::R1, false);
+ if (Offset > SPUFrameInfo::maxFrameOffset()
+ || Offset < SPUFrameInfo::minFrameOffset()) {
+ cerr << "Large stack adjustment ("
+ << Offset
+ << ") in SPURegisterInfo::eliminateFrameIndex.";
+ } else {
+ MO.ChangeToImmediate(Offset);
+ }
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void
+SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const
+{
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS)
+ const {
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+}
+
+void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
+{
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI && MMI->hasDebugInfo();
+ unsigned FrameLabelId = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ if (FrameSize > 0 || MFI->hasCalls()) {
+ FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isS10Constant(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ cerr << "Unhandled frame size: " << FrameSize << "\n";
+ abort();
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+ } else {
+ // This is a leaf function -- insert a branch hint iff there are
+ // sufficient number instructions in the basic block. Note that
+ // this is just a best guess based on the basic block's size.
+ if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ dl = MBBI->getDebugLoc();
+
+ // Insert terminator label
+ unsigned BranchLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(BranchLabelId);
+ }
+ }
+}
+
+void
+SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameInfo::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
+ if (FrameSize > 0 || MFI->hasCalls()) {
+ FrameSize = FrameSize + SPUFrameInfo::minStackSize();
+ if (isS10Constant(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ cerr << "Unhandled frame size: " << FrameSize << "\n";
+ abort();
+ }
+ }
+}
+
+unsigned
+SPURegisterInfo::getRARegister() const
+{
+ return SPU::R0;
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(MachineFunction &MF) const
+{
+ return SPU::R1;
+}
+
+void
+SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const
+{
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+
+int
+SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 000000000000..5b6e9ec68cdb
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,101 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+#include "SPUGenRegisterInfo.h.inc"
+
+namespace llvm {
+ class SPUSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ class SPURegisterInfo : public SPUGenRegisterInfo {
+ private:
+ const SPUSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ //! Predicate: Does the machine function use the link register?
+ bool usesLR(MachineFunction &MF) const;
+
+ public:
+ SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+
+ //! Translate a register's enum value to a register number
+ /*!
+ This method translates a register's enum value to it's regiser number,
+ e.g. SPU::R14 -> 14.
+ */
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ //! Return the array of callee-saved registers
+ virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+ //! Return the register class array of the callee-saved registers
+ virtual const TargetRegisterClass* const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ //! Return the reserved registers
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+ //! Eliminate the call frame setup pseudo-instructions
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ //! Convert frame indicies into machine operands
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int,
+ RegScavenger *RS) const;
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ //! Emit the function prologue
+ void emitPrologue(MachineFunction &MF) const;
+ //! Emit the function epilogue
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ //! Get return address register (LR, aka R0)
+ unsigned getRARegister() const;
+ //! Get the stack frame register (SP, aka R1)
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ //! Perform target-specific stack frame setup.
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ //------------------------------------------------------------------------
+ // New methods added:
+ //------------------------------------------------------------------------
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs();
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs();
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 000000000000..bb88f2bf9a29
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,429 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+ let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+ field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R64CClass::iterator
+ R64CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R64CClass::iterator
+ R64CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R64FPClass::iterator
+ R64FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R64FPClass::iterator
+ R64FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R32CClass::iterator
+ R32CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R32CClass::iterator
+ R32CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R32FPClass::iterator
+ R32FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R32FPClass::iterator
+ R32FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R16CClass::iterator
+ R16CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R16CClass::iterator
+ R16CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ R8CClass::iterator
+ R8CClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ R8CClass::iterator
+ R8CClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU",
+ [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ 128,
+ [
+ /* volatile register */
+ R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
+ R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+ R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+ R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+ R77, R78, R79,
+ /* non-volatile register: take hint from PPC and allocate in reverse order */
+ R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+ R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+ R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+ R86, R85, R84, R83, R82, R81, R80,
+ /* environment ptr, SP, LR */
+ R2, R1, R0 ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VECREGClass::iterator
+ VECREGClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ VECREGClass::iterator
+ VECREGClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-3; // don't allocate R2, R1, or R0 (envp, sp, lr)
+ }
+ }];
+}
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 000000000000..6c3afdf41fdc
--- /dev/null
+++ b/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,18 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#include "SPUGenRegisterNames.inc"
+
+#endif
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 000000000000..785dc4660110
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,57 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore : InstrItinClass; // ODD_UNIT
+def BranchHints : InstrItinClass; // ODD_UNIT
+def BranchResolv : InstrItinClass; // ODD_UNIT
+def ChanOpSPR : InstrItinClass; // ODD_UNIT
+def ShuffleOp : InstrItinClass; // ODD_UNIT
+def SelectOp : InstrItinClass; // ODD_UNIT
+def GatherOp : InstrItinClass; // ODD_UNIT
+def LoadNOP : InstrItinClass; // ODD_UNIT
+def ExecNOP : InstrItinClass; // EVEN_UNIT
+def SPrecFP : InstrItinClass; // EVEN_UNIT
+def DPrecFP : InstrItinClass; // EVEN_UNIT
+def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
+def ByteOp : InstrItinClass; // EVEN_UNIT
+def IntegerOp : InstrItinClass; // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
+def RotateShift : InstrItinClass; // EVEN_UNIT
+def ImmLoad : InstrItinClass; // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[
+ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
+ InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
+ InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
+ InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
+ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<RotateShift , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
+ InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
+ ]>;
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 000000000000..0a1c2f75cfe5
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,40 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "SPUGenSubtarget.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M,
+ const std::string &FS) :
+ TM(tm),
+ StackAlignment(16),
+ ProcDirective(SPU::DEFAULT_PROC),
+ UseLargeMem(false)
+{
+ // Should be the target SPU processor type. For now, since there's only
+ // one, simply default to the current "v0" default:
+ std::string default_cpu("v0");
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 000000000000..b6a34099b2f7
--- /dev/null
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,95 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+ class Module;
+ class GlobalValue;
+ class TargetMachine;
+
+ namespace SPU {
+ enum {
+ PROC_NONE,
+ DEFAULT_PROC
+ };
+ }
+
+ class SPUSubtarget : public TargetSubtarget {
+ protected:
+ const TargetMachine &TM;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame
+ /// on entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which SPU processor (this isn't really used, but it's there to keep
+ /// the C compiler happy)
+ unsigned ProcDirective;
+
+ /// Use (assume) large memory -- effectively disables the LQA/STQA
+ /// instructions that assume 259K local store.
+ bool UseLargeMem;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ SPUSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by
+ /// every function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ /// Use large memory addressing predicate
+ bool usingLargeMem() const {
+ return UseLargeMem;
+ }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
+ "-s:128:128";
+ }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
new file mode 100644
index 000000000000..ff88ed810716
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -0,0 +1,74 @@
+//===-- SPUTargetAsmInfo.cpp - Cell SPU asm properties ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
+ SPUTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ PCSymbol = ".";
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ // This corresponds to what the gcc SPU compiler emits, for consistency.
+ CStringSection = ".rodata.str";
+
+ // Has leb128, .loc and .file
+ HasLEB128 = true;
+ HasDotLocAndDotFile = true;
+
+ // BSS section needs to be emitted as ".section"
+ BSSSection = "\t.section\t.bss";
+ BSSSection_ = getUnnamedSection("\t.section\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ true);
+
+ SupportsDebugInformation = true;
+ NeedsSet = true;
+ SupportsMacInfoSection = false;
+ DwarfAbbrevSection = "\t.section .debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section .debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section .debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section .debug_frame,\"\",@progbits";
+ DwarfPubNamesSection = "\t.section .debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection = "\t.section .debug_pubtypes,\"\",progbits";
+ DwarfStrSection = "\t.section .debug_str,\"MS\",@progbits,1";
+ DwarfLocSection = "\t.section .debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section .debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section .debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section .debug_macinfo,\"\",progbits";
+
+ // Exception handling is not supported on CellSPU (think about it: you only
+ // have 256K for code+data. Would you support exception handling?)
+ SupportsExceptionHandling = false;
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+SPULinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ // We really need to write something here.
+ return TargetAsmInfo::PreferredEHDataFormat(Reason, Global);
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class SPUTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.h b/lib/Target/CellSPU/SPUTargetAsmInfo.h
new file mode 100644
index 000000000000..d10a56540592
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.h
@@ -0,0 +1,51 @@
+//===-- SPUTargetAsmInfo.h - Cell SPU asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "SPUSubtarget.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class SPUTargetMachine;
+
+ template <class BaseTAI>
+ struct SPUTargetAsmInfo : public BaseTAI {
+ explicit SPUTargetAsmInfo(const SPUTargetMachine &TM):
+ BaseTAI(TM) {
+ /* (unused today)
+ * const SPUSubtarget *Subtarget = &TM.getSubtarget<SPUSubtarget>(); */
+
+ BaseTAI::ZeroDirective = "\t.space\t";
+ BaseTAI::SetDirective = "\t.set";
+ BaseTAI::Data64bitsDirective = "\t.quad\t";
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ BaseTAI::InlineAsmStart = "# InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "# InlineAsm End";
+ }
+ };
+
+ struct SPULinuxTargetAsmInfo : public SPUTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit SPULinuxTargetAsmInfo(const SPUTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 000000000000..7fa902243c7b
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,98 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPURegisterNames.h"
+#include "SPUTargetAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+
+using namespace llvm;
+
+/// CellSPUTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CellSPUTargetMachineModule;
+int CellSPUTargetMachineModule = 0;
+
+namespace {
+ // Register the targets
+ RegisterTarget<SPUTargetMachine>
+ CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
+}
+
+const std::pair<unsigned, int> *
+SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 1;
+ return &LR[0];
+}
+
+const TargetAsmInfo *
+SPUTargetMachine::createTargetAsmInfo() const
+{
+ return new SPULinuxTargetAsmInfo(*this);
+}
+
+unsigned
+SPUTargetMachine::getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "spu-*" or "cellspu-*".
+ std::string TT = M.getTargetTriple();
+ if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu")
+ || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu")
+ || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-")
+ || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-"))
+ return 20;
+
+ return 0; // No match at all...
+}
+
+SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS)
+ : Subtarget(*this, M, FS),
+ DataLayout(Subtarget.getTargetDataString()),
+ InstrInfo(*this),
+ FrameInfo(*this),
+ TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData())
+{
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool
+SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel)
+{
+ // Install an instruction selector.
+ PM.add(createSPUISelDag(*this));
+ return false;
+}
+
+bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 000000000000..cd3920333851
--- /dev/null
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,95 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameInfo;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+ SPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ SPUInstrInfo InstrInfo;
+ SPUFrameInfo FrameInfo;
+ SPUTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ SPUTargetMachine(const Module &M, const std::string &FS);
+
+ /// Return the subtarget implementation object
+ virtual const SPUSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const SPUInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const SPUFrameInfo *getFrameInfo() const {
+ return &FrameInfo;
+ }
+ /*!
+ \note Cell SPU does not support JIT today. It could support JIT at some
+ point.
+ */
+ virtual TargetJITInfo *getJITInfo() {
+ return NULL;
+ }
+
+ //! Module match function
+ /*!
+ Module matching function called by TargetMachineRegistry().
+ */
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ virtual SPUTargetLowering *getTargetLowering() const {
+ return const_cast<SPUTargetLowering*>(&TLInfo);
+ }
+
+ virtual const SPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const {
+ return &DataLayout;
+ }
+
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
new file mode 100644
index 000000000000..f8182b80c94c
--- /dev/null
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(CppBackend
+ CPPBackend.cpp
+ )
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 000000000000..4082989c4c48
--- /dev/null
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,2007 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+ cl::value_desc("function name"));
+
+enum WhatToGenerate {
+ GenProgram,
+ GenModule,
+ GenContents,
+ GenFunction,
+ GenFunctions,
+ GenInline,
+ GenVariable,
+ GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+ cl::desc("Choose what kind of output to generate"),
+ cl::init(GenProgram),
+ cl::values(
+ clEnumValN(GenProgram, "program", "Generate a complete program"),
+ clEnumValN(GenModule, "module", "Generate a module definition"),
+ clEnumValN(GenContents, "contents", "Generate contents of a module"),
+ clEnumValN(GenFunction, "function", "Generate a function definition"),
+ clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+ clEnumValN(GenInline, "inline", "Generate an inline function"),
+ clEnumValN(GenVariable, "variable", "Generate a variable definition"),
+ clEnumValN(GenType, "type", "Generate a type definition"),
+ clEnumValEnd
+ )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+ cl::desc("Specify the name of the thing to generate"),
+ cl::init("!bad!"));
+
+/// CppBackendTargetMachineModule - Note that this is used on hosts
+/// that cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int CppBackendTargetMachineModule;
+int CppBackendTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
+
+namespace {
+ typedef std::vector<const Type*> TypeList;
+ typedef std::map<const Type*,std::string> TypeMap;
+ typedef std::map<const Value*,std::string> ValueMap;
+ typedef std::set<std::string> NameSet;
+ typedef std::set<const Type*> TypeSet;
+ typedef std::set<const Value*> ValueSet;
+ typedef std::map<const Value*,std::string> ForwardRefMap;
+
+ /// CppWriter - This class is the main chunk of code that converts an LLVM
+ /// module to a C++ translation unit.
+ class CppWriter : public ModulePass {
+ raw_ostream &Out;
+ const Module *TheModule;
+ uint64_t uniqueNum;
+ TypeMap TypeNames;
+ ValueMap ValueNames;
+ TypeMap UnresolvedTypes;
+ TypeList TypeStack;
+ NameSet UsedNames;
+ TypeSet DefinedTypes;
+ ValueSet DefinedValues;
+ ForwardRefMap ForwardRefs;
+ bool is_inline;
+
+ public:
+ static char ID;
+ explicit CppWriter(raw_ostream &o) :
+ ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {}
+
+ virtual const char *getPassName() const { return "C++ backend"; }
+
+ bool runOnModule(Module &M);
+
+ void printProgram(const std::string& fname, const std::string& modName );
+ void printModule(const std::string& fname, const std::string& modName );
+ void printContents(const std::string& fname, const std::string& modName );
+ void printFunction(const std::string& fname, const std::string& funcName );
+ void printFunctions();
+ void printInline(const std::string& fname, const std::string& funcName );
+ void printVariable(const std::string& fname, const std::string& varName );
+ void printType(const std::string& fname, const std::string& typeName );
+
+ void error(const std::string& msg);
+
+ private:
+ void printLinkageType(GlobalValue::LinkageTypes LT);
+ void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+ void printCallingConv(unsigned cc);
+ void printEscapedString(const std::string& str);
+ void printCFP(const ConstantFP* CFP);
+
+ std::string getCppName(const Type* val);
+ inline void printCppName(const Type* val);
+
+ std::string getCppName(const Value* val);
+ inline void printCppName(const Value* val);
+
+ void printAttributes(const AttrListPtr &PAL, const std::string &name);
+ bool printTypeInternal(const Type* Ty);
+ inline void printType(const Type* Ty);
+ void printTypes(const Module* M);
+
+ void printConstant(const Constant *CPV);
+ void printConstants(const Module* M);
+
+ void printVariableUses(const GlobalVariable *GV);
+ void printVariableHead(const GlobalVariable *GV);
+ void printVariableBody(const GlobalVariable *GV);
+
+ void printFunctionUses(const Function *F);
+ void printFunctionHead(const Function *F);
+ void printFunctionBody(const Function *F);
+ void printInstruction(const Instruction *I, const std::string& bbname);
+ std::string getOpName(Value*);
+
+ void printModuleBody();
+ };
+
+ static unsigned indent_level = 0;
+ inline raw_ostream& nl(raw_ostream& Out, int delta = 0) {
+ Out << "\n";
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ for (unsigned i = 0; i < indent_level; ++i)
+ Out << " ";
+ return Out;
+ }
+
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
+ inline void
+ sanitize(std::string& str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+ }
+
+ inline std::string
+ getTypePrefix(const Type* Ty ) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) +
+ "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ case Type::OpaqueTyID: return "opaque_";
+ default: return "other_";
+ }
+ return "unknown_";
+ }
+
+ // Looks up the type in the symbol table and returns a pointer to its name or
+ // a null pointer if it wasn't found. Note that this isn't the same as the
+ // Mode::getTypeName function which will return an empty string, not a null
+ // pointer if the name is not found.
+ inline const std::string*
+ findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
+ TypeSymbolTable::const_iterator TI = ST.begin();
+ TypeSymbolTable::const_iterator TE = ST.end();
+ for (;TI != TE; ++TI)
+ if (TI->second == Ty)
+ return &(TI->first);
+ return 0;
+ }
+
+ void CppWriter::error(const std::string& msg) {
+ cerr << msg << "\n";
+ exit(2);
+ }
+
+ // printCFP - Print a floating point constant .. very carefully :)
+ // This makes sure that conversion to/from floating yields the same binary
+ // result so that we don't lose precision.
+ void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::FloatTy)
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(";
+ Out << "APFloat(";
+#if HAVE_PRINTF_A
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::DoubleTy)
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
+#endif
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::DoubleTy)
+ Out << StrVal;
+ else
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::DoubleTy)
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
+#if HAVE_PRINTF_A
+ }
+#endif
+ Out << ")";
+ }
+
+ void CppWriter::printCallingConv(unsigned cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
+ }
+ }
+
+ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::GhostLinkage:
+ Out << "GlobalValue::GhostLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
+ }
+ }
+
+ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: assert(0 && "Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
+ }
+ }
+
+ // printEscapedString - Print each character of the specified string, escaping
+ // it if it is not printable or if it is an escape char.
+ void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+ }
+
+ std::string CppWriter::getCppName(const Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isInteger()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::VoidTy";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(" + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::X86_FP80Ty";
+ case Type::FloatTyID: return "Type::FloatTy";
+ case Type::DoubleTyID: return "Type::DoubleTy";
+ case Type::LabelTyID: return "Type::LabelTy";
+ default:
+ error("Invalid primitive type");
+ break;
+ }
+ return "Type::VoidTy"; // shouldn't be returned, but make it sensible
+ }
+
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
+
+ // See if the type has a name in the symboltable and build accordingly
+ const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
+ std::string name;
+ if (tName)
+ name = std::string(prefix) + *tName;
+ else
+ name = std::string(prefix) + utostr(uniqueNum++);
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+ }
+
+ void CppWriter::printCppName(const Type* Ty) {
+ printEscapedString(getCppName(Ty));
+ }
+
+ std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ } else {
+ name = getTypePrefix(val->getType());
+ }
+ name += (val->hasName() ? val->getName() : utostr(uniqueNum++));
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+ }
+
+ void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+ }
+
+ void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X) \
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
+#undef HANDLE_ATTR
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
+ nl(Out);
+ }
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
+ }
+ }
+
+ bool CppWriter::printTypeInternal(const Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return false;
+
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return false;
+
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Search the type stack for recursion. If we find it, then generate this
+ // as an OpaqueType, but make sure not to do this multiple times because
+ // the type could appear in multiple places on the stack. Once the opaque
+ // definition is issued, it must not be re-issued. Consequently we have to
+ // check the UnresolvedTypes list as well.
+ TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
+ Ty);
+ if (TI != TypeStack.end()) {
+ TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
+ if (I == UnresolvedTypes.end()) {
+ Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();";
+ nl(Out);
+ UnresolvedTypes[Ty] = typeName;
+ }
+ return true;
+ }
+
+ // We're going to print a derived type which, by definition, contains other
+ // types. So, push this one we're printing onto the type stack to assist with
+ // recursive definitions.
+ TypeStack.push_back(Ty);
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ const Type* argTy = static_cast<const Type*>(*PI);
+ bool isForward = printTypeInternal(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
+ nl(Out);
+ }
+ bool isForward = printTypeInternal(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType* ST = cast<StructType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ const Type* fieldTy = static_cast<const Type*>(*EI);
+ bool isForward = printTypeInternal(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
+ nl(Out);
+ }
+ Out << "StructType* " << typeName << " = StructType::get("
+ << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType* AT = cast<ArrayType>(Ty);
+ const Type* ET = AT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType* PT = cast<PointerType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType* PT = cast<VectorType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::OpaqueTyID: {
+ Out << "OpaqueType* " << typeName << " = OpaqueType::get();";
+ nl(Out);
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
+
+ // If the type had a name, make sure we recreate it.
+ const std::string* progTypeName =
+ findTypeName(TheModule->getTypeSymbolTable(),Ty);
+ if (progTypeName) {
+ Out << "mod->addTypeName(\"" << *progTypeName << "\", "
+ << typeName << ");";
+ nl(Out);
+ }
+
+ // Pop us off the type stack
+ TypeStack.pop_back();
+
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
+
+ // Early resolve as many unresolved types as possible. Search the unresolved
+ // types map for the type we just printed. Now that its definition is complete
+ // we can resolve any previous references to it. This prevents a cascade of
+ // unresolved types.
+ TypeMap::iterator I = UnresolvedTypes.find(Ty);
+ if (I != UnresolvedTypes.end()) {
+ Out << "cast<OpaqueType>(" << I->second
+ << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
+ nl(Out);
+ Out << I->second << " = cast<";
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: Out << "FunctionType"; break;
+ case Type::ArrayTyID: Out << "ArrayType"; break;
+ case Type::StructTyID: Out << "StructType"; break;
+ case Type::VectorTyID: Out << "VectorType"; break;
+ case Type::PointerTyID: Out << "PointerType"; break;
+ case Type::OpaqueTyID: Out << "OpaqueType"; break;
+ default: Out << "NoSuchDerivedType"; break;
+ }
+ Out << ">(" << I->second << "_fwd.get());";
+ nl(Out); nl(Out);
+ UnresolvedTypes.erase(I);
+ }
+
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
+
+ // We weren't a recursive type
+ return false;
+ }
+
+ // Prints a type definition. Returns true if it could not resolve all the
+ // types in the definition but had to use a forward reference.
+ void CppWriter::printType(const Type* Ty) {
+ assert(TypeStack.empty());
+ TypeStack.clear();
+ printTypeInternal(Ty);
+ assert(TypeStack.empty());
+ }
+
+ void CppWriter::printTypes(const Module* M) {
+ // Walk the symbol table and print out all its types
+ const TypeSymbolTable& symtab = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
+ TI != TE; ++TI) {
+
+ // For primitive types and types already defined, just add a name
+ TypeMap::const_iterator TNI = TypeNames.find(TI->second);
+ if (TI->second->isInteger() || TI->second->isPrimitiveType() ||
+ TNI != TypeNames.end()) {
+ Out << "mod->addTypeName(\"";
+ printEscapedString(TI->first);
+ Out << "\", " << getCppName(TI->second) << ");";
+ nl(Out);
+ // For everything else, define the type
+ } else {
+ printType(TI->second);
+ }
+ }
+
+ // Add all of the global variables to the value table...
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
+
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
+ }
+ }
+ }
+ }
+
+
+ // printConstant - Print out a constant pool entry...
+ void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
+
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
+
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName << " = ConstantInt::get(APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth() << ", \""
+ << constValue << "\", " << constValue.length() << ", 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() && CA->getType()->getElementType() == Type::Int8Ty) {
+ Out << "Constant* " << constName << " = ConstantArray::get(\"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
+ }
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CA->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CA->getOperand(i)); // recurse to print operands
+ Out << constName << "_elems.push_back("
+ << getCppName(CA->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantArray::get("
+ << typeName << ", " << constName << "_elems);";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << "&" << constName << "_indices[0], "
+ << constName << "_indices.size()"
+ << " );";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: assert(0 && "Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
+ }
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
+ }
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
+ }
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
+ }
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
+ }
+ nl(Out);
+ }
+
+ void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ Constant* Init = GV->getInitializer();
+ printType(Init->getType());
+ if (Function* F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(gv);
+ }
+ if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ }
+ }
+ }
+
+ void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(";
+ printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\",";
+ nl(Out) << "mod);";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
+ Out << "\");";
+ nl(Out);
+ }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
+ }
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+ }
+
+ void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
+ nl(Out);
+ }
+ }
+
+ std::string CppWriter::getOpName(Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
+
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
+
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+ }
+
+ // printInstruction - This member is called for each Instruction in a function.
+ void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ std::string* opNames = new std::string[I->getNumOperands()];
+ for (unsigned i = 0; i < I->getNumOperands(); i++) {
+ opNames[i] = getOpName(I->getOperand(i));
+ }
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create("
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3 ) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
+ }
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst* sw = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << sw->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 2; i < sw->getNumOperands(); i += 2 ) {
+ Out << iName << "->addCase("
+ << opNames[i] << ", "
+ << opNames[i+1] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 3; i < inv->getNumOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << opNames[2] << ", "
+ << iName << "_params.begin(), " << iName << "_params.end(), \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable:{
+ Out << "new UnreachableInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Malloc: {
+ const MallocInst* mallocI = cast<MallocInst>(I);
+ Out << "MallocInst* " << iName << " = new MallocInst("
+ << getCppName(mallocI->getAllocatedType()) << ", ";
+ if (mallocI->isArrayAllocation())
+ Out << opNames[0] << ", " ;
+ Out << "\"";
+ printEscapedString(mallocI->getName());
+ Out << "\", " << bbname << ");";
+ if (mallocI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << mallocI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Free: {
+ Out << "FreeInst* " << iName << " = new FreeInst("
+ << getCppName(I->getOperand(0)) << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load:{
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
+ << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices.begin(), "
+ << iName << "_indices.end()";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->reserveOperandSpace("
+ << phi->getNumIncomingValues()
+ << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+ Out << iName << "->addIncoming("
+ << opNames[i] << ", " << opNames[i+1] << ");";
+ nl(Out);
+ }
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(!"Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call:{
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
+ nl(Out);
+ }
+ if (call->getNumOperands() > 2) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 1; i < call->getNumOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
+ nl(Out);
+ }
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[0] << ", " << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ } else if (call->getNumOperands() == 2) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[0] << ", " << opNames[1] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0]
+ << ", \"";
+ }
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true":"false");
+ Out << ");";
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
+ nl(Out);
+ }
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ }
+ DefinedValues.insert(I);
+ nl(Out);
+ delete [] opNames;
+}
+
+ // Print out the types, constants and declarations needed by one function
+ void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
+
+ // Print the function's function type
+ printType(F->getFunctionType());
+
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+ }
+
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
+
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand))
+ consts.insert(C);
+ }
+ }
+ }
+
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
+ }
+ }
+
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
+
+ // Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
+ }
+ }
+
+ void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ if (is_inline) {
+ Out << " = mod->getFunction(\"";
+ printEscapedString(F->getName());
+ Out << "\", " << getCppName(F->getFunctionType()) << ");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+ }
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(F);
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (F->hasGC()) {
+ printCppName(F);
+ Out << "->setGC(\"" << F->getGC() << "\");";
+ nl(Out);
+ }
+ if (is_inline) {
+ Out << "}";
+ nl(Out);
+ }
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+ }
+
+ void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
+
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
+
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
+ nl(Out);
+ }
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
+ nl(Out);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
+ }
+ }
+ }
+
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname << " = BasicBlock::Create(\"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
+
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
+ }
+ }
+
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
+ nl(Out);
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
+ nl(Out);
+ ForwardRefs.erase(I);
+ }
+ }
+
+ void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+ }
+
+ void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
+
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
+ }
+
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+ }
+ }
+
+ void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Support/raw_ostream.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " outs().flush();\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+ }
+
+ void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"" << mName << "\");";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
+ nl(Out);
+ ++LI;
+ }
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+ }
+
+ void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"" << mName << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+ }
+
+ void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
+ }
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+ }
+
+ void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
+ }
+ }
+ }
+
+ void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+ }
+
+ void CppWriter::printType(const std::string& fname,
+ const std::string& typeName) {
+ const Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
+ }
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+ }
+
+ bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
+ }
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
+ raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel) {
+ if (FileType != TargetMachine::AssemblyFile) return true;
+ PM.add(new CppWriter(o));
+ return false;
+}
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 000000000000..db4bc0e722c8
--- /dev/null
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,44 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ CPPTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
new file mode 100644
index 000000000000..ca7e1a82c808
--- /dev/null
+++ b/lib/Target/CppBackend/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/CppBackend/Makefile --- ------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCppBackend
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
new file mode 100644
index 000000000000..05d235177642
--- /dev/null
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -0,0 +1,169 @@
+//===-- DarwinTargetAsmInfo.cpp - Darwin asm properties ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+
+ CStringSection_ = getUnnamedSection("\t.cstring",
+ SectionFlags::Mergeable | SectionFlags::Strings);
+ FourByteConstantSection = getUnnamedSection("\t.literal4\n",
+ SectionFlags::Mergeable);
+ EightByteConstantSection = getUnnamedSection("\t.literal8\n",
+ SectionFlags::Mergeable);
+
+ // Note: 16-byte constant section is subtarget specific and should be provided
+ // there, if needed.
+ SixteenByteConstantSection = 0;
+
+ ReadOnlySection = getUnnamedSection("\t.const\n", SectionFlags::None);
+
+ TextCoalSection =
+ getNamedSection("\t__TEXT,__textcoal_nt,coalesced,pure_instructions",
+ SectionFlags::Code);
+ ConstTextCoalSection = getNamedSection("\t__TEXT,__const_coal,coalesced",
+ SectionFlags::None);
+ ConstDataCoalSection = getNamedSection("\t__DATA,__const_coal,coalesced",
+ SectionFlags::None);
+ ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
+ DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
+ SectionFlags::Writeable);
+}
+
+/// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
+/// the PrivateGlobalPrefix or the LessPrivateGlobalPrefix does not have the
+/// directive emitted (this occurs in ObjC metadata).
+
+bool
+DarwinTargetAsmInfo::emitUsedDirectiveFor(const GlobalValue* GV,
+ Mangler *Mang) const {
+ if (GV==0)
+ return false;
+ if (GV->hasLocalLinkage() && !isa<Function>(GV) &&
+ ((strlen(getPrivateGlobalPrefix()) != 0 &&
+ Mang->getValueName(GV).substr(0,strlen(getPrivateGlobalPrefix())) ==
+ getPrivateGlobalPrefix()) ||
+ (strlen(getLessPrivateGlobalPrefix()) != 0 &&
+ Mang->getValueName(GV).substr(0,strlen(getLessPrivateGlobalPrefix())) ==
+ getLessPrivateGlobalPrefix())))
+ return false;
+ return true;
+}
+
+const Section*
+DarwinTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ bool isWeak = GV->isWeakForLinker();
+ bool isNonStatic = TM.getRelocationModel() != Reloc::Static;
+
+ switch (Kind) {
+ case SectionKind::Text:
+ if (isWeak)
+ return TextCoalSection;
+ else
+ return TextSection;
+ case SectionKind::Data:
+ case SectionKind::ThreadData:
+ case SectionKind::BSS:
+ case SectionKind::ThreadBSS:
+ if (cast<GlobalVariable>(GV)->isConstant())
+ return (isWeak ? ConstDataCoalSection : ConstDataSection);
+ else
+ return (isWeak ? DataCoalSection : DataSection);
+ case SectionKind::ROData:
+ return (isWeak ? ConstDataCoalSection :
+ (isNonStatic ? ConstDataSection : getReadOnlySection()));
+ case SectionKind::RODataMergeStr:
+ return (isWeak ?
+ ConstTextCoalSection :
+ MergeableStringSection(cast<GlobalVariable>(GV)));
+ case SectionKind::RODataMergeConst:
+ return (isWeak ?
+ ConstDataCoalSection:
+ MergeableConstSection(cast<GlobalVariable>(GV)));
+ default:
+ assert(0 && "Unsuported section kind for global");
+ }
+
+ // FIXME: Do we have any extra special weird cases?
+ return NULL;
+}
+
+const Section*
+DarwinTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const {
+ const TargetData *TD = TM.getTargetData();
+ Constant *C = cast<GlobalVariable>(GV)->getInitializer();
+ const Type *Ty = cast<ArrayType>(C->getType())->getElementType();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size) {
+ unsigned Align = TD->getPreferredAlignment(GV);
+ if (Align <= 32)
+ return getCStringSection_();
+ }
+
+ return getReadOnlySection();
+}
+
+const Section*
+DarwinTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+DarwinTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4)
+ return FourByteConstantSection;
+ else if (Size == 8)
+ return EightByteConstantSection;
+ else if (Size == 16 && SixteenByteConstantSection)
+ return SixteenByteConstantSection;
+
+ return getReadOnlySection();
+}
+
+const Section*
+DarwinTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ const Section* S = MergeableConstSection(Ty);
+
+ // Handle weird special case, when compiling PIC stuff.
+ if (S == getReadOnlySection() &&
+ TM.getRelocationModel() != Reloc::Static)
+ return ConstDataSection;
+
+ return S;
+}
+
+std::string
+DarwinTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const {
+ assert(0 && "Darwin does not use unique sections");
+ return "";
+}
diff --git a/lib/Target/ELFTargetAsmInfo.cpp b/lib/Target/ELFTargetAsmInfo.cpp
new file mode 100644
index 000000000000..8f6e96e2751d
--- /dev/null
+++ b/lib/Target/ELFTargetAsmInfo.cpp
@@ -0,0 +1,227 @@
+//===-- ELFTargetAsmInfo.cpp - ELF asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on ELF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+ELFTargetAsmInfo::ELFTargetAsmInfo(const TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+
+ BSSSection_ = getUnnamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS);
+ ReadOnlySection = getNamedSection("\t.rodata", SectionFlags::None);
+ TLSDataSection = getNamedSection("\t.tdata",
+ SectionFlags::Writeable | SectionFlags::TLS);
+ TLSBSSSection = getNamedSection("\t.tbss",
+ SectionFlags::Writeable | SectionFlags::TLS | SectionFlags::BSS);
+
+ DataRelSection = getNamedSection("\t.data.rel", SectionFlags::Writeable);
+ DataRelLocalSection = getNamedSection("\t.data.rel.local",
+ SectionFlags::Writeable);
+ DataRelROSection = getNamedSection("\t.data.rel.ro",
+ SectionFlags::Writeable);
+ DataRelROLocalSection = getNamedSection("\t.data.rel.ro.local",
+ SectionFlags::Writeable);
+}
+
+SectionKind::Kind
+ELFTargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = TargetAsmInfo::SectionKindForGlobal(GV);
+
+ if (Kind != SectionKind::Data)
+ return Kind;
+
+ // Decide, whether we need data.rel stuff
+ const GlobalVariable* GVar = dyn_cast<GlobalVariable>(GV);
+ if (GVar->hasInitializer()) {
+ Constant *C = GVar->getInitializer();
+ bool isConstant = GVar->isConstant();
+ unsigned Reloc = RelocBehaviour();
+ if (Reloc != Reloc::None && C->ContainsRelocations(Reloc))
+ return (C->ContainsRelocations(Reloc::Global) ?
+ (isConstant ?
+ SectionKind::DataRelRO : SectionKind::DataRel) :
+ (isConstant ?
+ SectionKind::DataRelROLocal : SectionKind::DataRelLocal));
+ }
+
+ return Kind;
+}
+
+const Section*
+ELFTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ return TextSection;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ std::string Name = UniqueSectionForGlobal(GV, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ }
+ } else if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (GVar->isWeakForLinker()) {
+ std::string Name = UniqueSectionForGlobal(GVar, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GVar, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ } else {
+ switch (Kind) {
+ case SectionKind::Data:
+ case SectionKind::SmallData:
+ return DataSection;
+ case SectionKind::DataRel:
+ return DataRelSection;
+ case SectionKind::DataRelLocal:
+ return DataRelLocalSection;
+ case SectionKind::DataRelRO:
+ return DataRelROSection;
+ case SectionKind::DataRelROLocal:
+ return DataRelROLocalSection;
+ case SectionKind::BSS:
+ case SectionKind::SmallBSS:
+ // ELF targets usually have BSS sections
+ return getBSSSection_();
+ case SectionKind::ROData:
+ case SectionKind::SmallROData:
+ return getReadOnlySection();
+ case SectionKind::RODataMergeStr:
+ return MergeableStringSection(GVar);
+ case SectionKind::RODataMergeConst:
+ return MergeableConstSection(GVar);
+ case SectionKind::ThreadData:
+ // ELF targets usually support TLS stuff
+ return TLSDataSection;
+ case SectionKind::ThreadBSS:
+ return TLSBSSSection;
+ default:
+ assert(0 && "Unsuported section kind for global");
+ }
+ }
+ } else
+ assert(0 && "Unsupported global");
+
+ return NULL;
+}
+
+const Section*
+ELFTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ // FIXME: Support data.rel stuff someday
+ return MergeableConstSection(Ty);
+}
+
+const Section*
+ELFTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+ELFTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ // FIXME: string here is temporary, until stuff will fully land in.
+ // We cannot use {Four,Eight,Sixteen}ByteConstantSection here, since it's
+ // currently directly used by asmprinter.
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4 || Size == 8 || Size == 16) {
+ std::string Name = ".rodata.cst" + utostr(Size);
+
+ return getNamedSection(Name.c_str(),
+ SectionFlags::setEntitySize(SectionFlags::Mergeable,
+ Size));
+ }
+
+ return getReadOnlySection();
+}
+
+const Section*
+ELFTargetAsmInfo::MergeableStringSection(const GlobalVariable *GV) const {
+ const TargetData *TD = TM.getTargetData();
+ Constant *C = cast<GlobalVariable>(GV)->getInitializer();
+ const Type *Ty = cast<ArrayType>(C->getType())->getElementType();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size <= 16) {
+ assert(getCStringSection() && "Should have string section prefix");
+
+ // We also need alignment here
+ unsigned Align = TD->getPrefTypeAlignment(Ty);
+ if (Align < Size)
+ Align = Size;
+
+ std::string Name = getCStringSection() + utostr(Size) + '.' + utostr(Align);
+ unsigned Flags = SectionFlags::setEntitySize(SectionFlags::Mergeable |
+ SectionFlags::Strings,
+ Size);
+ return getNamedSection(Name.c_str(), Flags);
+ }
+
+ return getReadOnlySection();
+}
+
+std::string ELFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (!(flags & SectionFlags::Debug))
+ Flags += 'a';
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+ if (flags & SectionFlags::Mergeable)
+ Flags += 'M';
+ if (flags & SectionFlags::Strings)
+ Flags += 'S';
+ if (flags & SectionFlags::TLS)
+ Flags += 'T';
+ if (flags & SectionFlags::Small)
+ Flags += 's';
+
+ Flags += "\",";
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (strcmp(CommentString, "@") == 0)
+ Flags += '%';
+ else
+ Flags += '@';
+
+ // FIXME: There can be exceptions here
+ if (flags & SectionFlags::BSS)
+ Flags += "nobits";
+ else
+ Flags += "progbits";
+
+ if (unsigned entitySize = SectionFlags::getEntitySize(flags))
+ Flags += "," + utostr(entitySize);
+
+ return Flags;
+}
diff --git a/lib/Target/IA64/AsmPrinter/CMakeLists.txt b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..1d552bd5551c
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMIA64AsmPrinter
+ IA64AsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMIA64CodeGen n)
+
+add_dependencies(LLVMIA64AsmPrinter ${n})
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
new file mode 100644
index 000000000000..fc54e23a44d7
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -0,0 +1,376 @@
+//===-- IA64AsmPrinter.cpp - Print out IA64 LLVM as assembly --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to assembly accepted by the GNU binutils 'gas'
+// assembler. The Intel 'ias' and HP-UX 'as' assemblers *may* choke on this
+// output, but if so that's a bug I'd like to hear about: please file a bug
+// report in bugzilla. FYI, the not too bad 'ias' assembler is bundled with
+// the Intel C/C++ compiler for Itanium Linux.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class IA64AsmPrinter : public AsmPrinter {
+ std::set<std::string> ExternalFunctionNames, ExternalObjectNames;
+ public:
+ explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "IA64 Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo){
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.getType() == MachineOperand::MO_Register) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physref??");
+ //XXX Bug Workaround: See note in Printer::doInitialization about %.
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ } else {
+ printOp(MO);
+ }
+ }
+
+ void printS8ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=128) val=val-256; // if negative, flip sign
+ O << val;
+ }
+ void printS14ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=8192) val=val-16384; // if negative, flip sign
+ O << val;
+ }
+ void printS22ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ int val=(unsigned int)MI->getOperand(OpNo).getImm();
+ if(val>=2097152) val=val-4194304; // if negative, flip sign
+ O << val;
+ }
+ void printU64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (uint64_t)MI->getOperand(OpNo).getImm();
+ }
+ void printS64ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+// XXX : nasty hack to avoid GPREL22 "relocation truncated to fit" linker
+// errors - instead of add rX = @gprel(CPI<whatever>), r1;; we now
+// emit movl rX = @gprel(CPI<whatever);;
+// add rX = rX, r1;
+// this gives us 64 bits instead of 22 (for the add long imm) to play
+// with, which shuts up the linker. The problem is that the constant
+// pool entries aren't immediates at this stage, so we check here.
+// If it's an immediate, print it the old fashioned way. If it's
+// not, we print it as a constant pool index.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << (int64_t)MI->getOperand(OpNo).getImm();
+ } else { // this is a constant pool reference: FIXME: assert this
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+
+ void printGlobalOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), false); // this is NOT a br.call instruction
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ printOp(MI->getOperand(OpNo), true); // this is a br.call instruction
+ }
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, bool isBRCALLinsn= false);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "IA64GenAsmWriter.inc"
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Print out labels for the function.
+ EmitAlignment(5);
+ O << "\t.global\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+ O << CurrentFnName << ":\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (!I->pred_empty()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void IA64AsmPrinter::printOp(const MachineOperand &MO,
+ bool isBRCALLinsn /* = false */) {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << RI.get(MO.getReg()).AsmName;
+ return;
+
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_ConstantPoolIndex: {
+ O << "@gprel(" << TAI->getPrivateGlobalPrefix()
+ << "CPI" << getFunctionNumber() << "_" << MO.getIndex() << ")";
+ return;
+ }
+
+ case MachineOperand::MO_GlobalAddress: {
+
+ // functions need @ltoff(@fptr(fn_name)) form
+ GlobalValue *GV = MO.getGlobal();
+ Function *F = dyn_cast<Function>(GV);
+
+ bool Needfptr=false; // if we're computing an address @ltoff(X), do
+ // we need to decorate it so it becomes
+ // @ltoff(@fptr(X)) ?
+ if (F && !isBRCALLinsn /*&& F->isDeclaration()*/)
+ Needfptr=true;
+
+ // if this is the target of a call instruction, we should define
+ // the function somewhere (GNU gas has no problem without this, but
+ // Intel ias rightly complains of an 'undefined symbol')
+
+ if (F /*&& isBRCALLinsn*/ && F->isDeclaration())
+ ExternalFunctionNames.insert(Mang->getValueName(MO.getGlobal()));
+ else
+ if (GV->isDeclaration()) // e.g. stuff like 'stdin'
+ ExternalObjectNames.insert(Mang->getValueName(MO.getGlobal()));
+
+ if (!isBRCALLinsn)
+ O << "@ltoff(";
+ if (Needfptr)
+ O << "@fptr(";
+ O << Mang->getValueName(MO.getGlobal());
+
+ if (Needfptr && !isBRCALLinsn)
+ O << "#))"; // close both fptr( and ltoff(
+ else {
+ if (Needfptr)
+ O << "#)"; // close only fptr(
+ if (!isBRCALLinsn)
+ O << "#)"; // close only ltoff(
+ }
+
+ int Offset = MO.getOffset();
+ if (Offset > 0)
+ O << " + " << Offset;
+ else if (Offset < 0)
+ O << " - " << -Offset;
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ ExternalFunctionNames.insert(MO.getSymbolName());
+ return;
+ default:
+ O << "<AsmPrinter: unknown operand type: " << MO.getType() << " >"; return;
+ }
+}
+
+/// printMachineInstruction -- Print out a single IA64 LLVM instruction
+/// MI to the current output stream.
+///
+void IA64AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool IA64AsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ O << "\n.ident \"LLVM-ia64\"\n\n"
+ << "\t.psr lsb\n" // should be "msb" on HP-UX, for starters
+ << "\t.radix C\n"
+ << "\t.psr abi64\n"; // we only support 64 bits for now
+ return Result;
+}
+
+void IA64AsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage()) {
+ O << "\t.lcomm " << name << "#," << Size
+ << ',' << (1 << Align);
+ O << '\n';
+ } else {
+ O << "\t.common " << name << "#," << Size
+ << ',' << (1 << Align);
+ O << '\n';
+ }
+
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "GhostLinkage cannot appear in IA64AsmPrinter!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+
+bool IA64AsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // we print out ".global X \n .type X, @function" for each external function
+ O << "\n\n// br.call targets referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalFunctionNames.begin(),
+ e = ExternalFunctionNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @function\n";
+ }
+ O << "\n\n";
+
+ // we print out ".global X \n .type X, @object" for each external object
+ O << "\n\n// (external) symbols referenced (and not defined) above: \n";
+ for (std::set<std::string>::iterator i = ExternalObjectNames.begin(),
+ e = ExternalObjectNames.end(); i!=e; ++i) {
+ O << "\t.global " << *i << "\n\t.type " << *i << ", @object\n";
+ }
+ O << "\n\n";
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream, using
+/// the given target machine description.
+///
+FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
+ IA64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
diff --git a/lib/Target/IA64/AsmPrinter/Makefile b/lib/Target/IA64/AsmPrinter/Makefile
new file mode 100644
index 000000000000..12880f36f76d
--- /dev/null
+++ b/lib/Target/IA64/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/IA64/AsmPrinter/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMIA64AsmPrinter
+
+# Hack: we need to include 'main' IA64 target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt
new file mode 100644
index 000000000000..26f86ca197fc
--- /dev/null
+++ b/lib/Target/IA64/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_TARGET_DEFINITIONS IA64.td)
+
+tablegen(IA64GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(IA64GenRegisterNames.inc -gen-register-enums)
+tablegen(IA64GenRegisterInfo.inc -gen-register-desc)
+tablegen(IA64GenInstrNames.inc -gen-instr-enums)
+tablegen(IA64GenInstrInfo.inc -gen-instr-desc)
+tablegen(IA64GenAsmWriter.inc -gen-asm-writer)
+tablegen(IA64GenDAGISel.inc -gen-dag-isel)
+
+add_llvm_target(IA64CodeGen
+ IA64Bundling.cpp
+ IA64InstrInfo.cpp
+ IA64ISelDAGToDAG.cpp
+ IA64ISelLowering.cpp
+ IA64RegisterInfo.cpp
+ IA64Subtarget.cpp
+ IA64TargetAsmInfo.cpp
+ IA64TargetMachine.cpp
+ )
diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h
new file mode 100644
index 000000000000..ec8e3d6d74da
--- /dev/null
+++ b/lib/Target/IA64/IA64.h
@@ -0,0 +1,58 @@
+//===-- IA64.h - Top-level interface for IA64 representation ------*- C++ -*-===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the IA64
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_IA64_H
+#define TARGET_IA64_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class IA64TargetMachine;
+class FunctionPass;
+class raw_ostream;
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts an LLVM
+/// function into IA64 machine code in a sane, DAG->DAG transform.
+///
+FunctionPass *createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM);
+
+/// createIA64BundlingPass - This pass adds stop bits and bundles
+/// instructions.
+///
+FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM);
+
+/// createIA64CodePrinterPass - Returns a pass that prints the IA64
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *createIA64CodePrinterPass(raw_ostream &o,
+ IA64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+
+} // End llvm namespace
+
+// Defines symbolic names for IA64 registers. This defines a mapping from
+// register name to register number.
+//
+#include "IA64GenRegisterNames.inc"
+
+// Defines symbolic names for the IA64 instructions.
+//
+#include "IA64GenInstrNames.inc"
+
+#endif
+
+
diff --git a/lib/Target/IA64/IA64.td b/lib/Target/IA64/IA64.td
new file mode 100644
index 000000000000..c469281ab16e
--- /dev/null
+++ b/lib/Target/IA64/IA64.td
@@ -0,0 +1,39 @@
+//===-- IA64.td - Target definition file for Intel IA64 -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel IA64 architecture,
+// also known variously as ia64, IA-64, IPF, "the Itanium architecture" etc.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "IA64RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrInfo.td"
+
+def IA64InstrInfo : InstrInfo { }
+
+def IA64 : Target {
+ // Our instruction set
+ let InstructionSet = IA64InstrInfo;
+
+}
+
+
diff --git a/lib/Target/IA64/IA64Bundling.cpp b/lib/Target/IA64/IA64Bundling.cpp
new file mode 100644
index 000000000000..3a9ba6ca3f61
--- /dev/null
+++ b/lib/Target/IA64/IA64Bundling.cpp
@@ -0,0 +1,118 @@
+//===-- IA64Bundling.cpp - IA-64 instruction bundling pass. ------------ --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Add stops where required to prevent read-after-write and write-after-write
+// dependencies, for both registers and memory addresses. There are exceptions:
+//
+// - Compare instructions (cmp*, tbit, tnat, fcmp, frcpa) are OK with
+// WAW dependencies so long as they all target p0, or are of parallel
+// type (.and*/.or*)
+//
+// FIXME: bundling, for now, is left to the assembler.
+// FIXME: this might be an appropriate place to translate between different
+// instructions that do the same thing, if this helps bundling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64InstrInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(StopBitsAdded, "Number of stop bits added");
+
+namespace {
+ struct IA64BundlingPass : public MachineFunctionPass {
+ static char ID;
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ IA64TargetMachine &TM;
+
+ IA64BundlingPass(IA64TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) Bundling Pass";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ // XXX: ugly global, but pending writes can cross basic blocks. Note that
+ // taken branches end instruction groups. So we only need to worry about
+ // 'fallthrough' code
+ std::set<unsigned> PendingRegWrites;
+ };
+ char IA64BundlingPass::ID = 0;
+} // end of anonymous namespace
+
+/// createIA64BundlingPass - Returns a pass that adds STOP (;;) instructions
+/// and arranges the result into bundles.
+///
+FunctionPass *llvm::createIA64BundlingPass(IA64TargetMachine &tm) {
+ return new IA64BundlingPass(tm);
+}
+
+/// runOnMachineBasicBlock - add stops and bundle this MBB.
+///
+bool IA64BundlingPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *CurrentInsn = I++;
+ std::set<unsigned> CurrentReads, CurrentWrites, OrigWrites;
+
+ for(unsigned i=0; i < CurrentInsn->getNumOperands(); i++) {
+ MachineOperand &MO=CurrentInsn->getOperand(i);
+ if (MO.isReg()) {
+ if(MO.isUse()) { // TODO: exclude p0
+ CurrentReads.insert(MO.getReg());
+ }
+ if(MO.isDef()) { // TODO: exclude p0
+ CurrentWrites.insert(MO.getReg());
+ OrigWrites.insert(MO.getReg()); // FIXME: use a nondestructive
+ // set_intersect instead?
+ }
+ }
+ }
+
+ // CurrentReads/CurrentWrites contain info for the current instruction.
+ // Does it read or write any registers that are pending a write?
+ // (i.e. not separated by a stop)
+ set_intersect(CurrentReads, PendingRegWrites);
+ set_intersect(CurrentWrites, PendingRegWrites);
+
+ if(! (CurrentReads.empty() && CurrentWrites.empty()) ) {
+ // there is a conflict, insert a stop and reset PendingRegWrites
+ CurrentInsn = BuildMI(MBB, CurrentInsn, CurrentInsn->getDebugLoc(),
+ TM.getInstrInfo()->get(IA64::STOP), 0);
+ PendingRegWrites=OrigWrites; // carry over current writes to next insn
+ Changed=true; StopBitsAdded++; // update stats
+ } else { // otherwise, track additional pending writes
+ set_union(PendingRegWrites, OrigWrites);
+ }
+ } // onto the next insn in the MBB
+
+ return Changed;
+}
+
diff --git a/lib/Target/IA64/IA64ISelDAGToDAG.cpp b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9800c506ca9e
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelDAGToDAG.cpp
@@ -0,0 +1,575 @@
+//===---- IA64ISelDAGToDAG.cpp - IA64 pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for IA64,
+// converting a legalized dag to an IA64 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ia64-codegen"
+#include "IA64.h"
+#include "IA64TargetMachine.h"
+#include "IA64ISelLowering.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// IA64DAGToDAGISel - IA64 specific code to select IA64 machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class IA64DAGToDAGISel : public SelectionDAGISel {
+ unsigned GlobalBaseReg;
+ public:
+ explicit IA64DAGToDAGISel(IA64TargetMachine &TM)
+ : SelectionDAGISel(TM) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ return SelectionDAGISel::runOnFunction(Fn);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ // SDValue getGlobalBaseReg(); TODO: hmm
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue N);
+
+ SDNode *SelectIntImmediateExpr(SDValue LHS, SDValue RHS,
+ unsigned OCHi, unsigned OCLo,
+ bool IsArithmetic = false,
+ bool Negate = false);
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC);
+
+ /// SelectAddr - Given the specified address, return the two operands for a
+ /// load/store instruction, and return true if it should be an indexed [r+r]
+ /// operation.
+ bool SelectAddr(SDValue Addr, SDValue &Op1, SDValue &Op2);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "IA64 (Itanium) DAG->DAG Instruction Selector";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "IA64GenDAGISel.inc"
+
+private:
+ SDNode *SelectDIV(SDValue Op);
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void IA64DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *IA64DAGToDAGISel::SelectDIV(SDValue Op) {
+ SDNode *N = Op.getNode();
+ SDValue Chain = N->getOperand(0);
+ SDValue Tmp1 = N->getOperand(0);
+ SDValue Tmp2 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ bool isFP=false;
+
+ if(Tmp1.getValueType().isFloatingPoint())
+ isFP=true;
+
+ bool isModulus=false; // is it a division or a modulus?
+ bool isSigned=false;
+
+ switch(N->getOpcode()) {
+ case ISD::FDIV:
+ case ISD::SDIV: isModulus=false; isSigned=true; break;
+ case ISD::UDIV: isModulus=false; isSigned=false; break;
+ case ISD::FREM:
+ case ISD::SREM: isModulus=true; isSigned=true; break;
+ case ISD::UREM: isModulus=true; isSigned=false; break;
+ }
+
+ // TODO: check for integer divides by powers of 2 (or other simple patterns?)
+
+ SDValue TmpPR, TmpPR2;
+ SDValue TmpF1, TmpF2, TmpF3, TmpF4, TmpF5, TmpF6, TmpF7, TmpF8;
+ SDValue TmpF9, TmpF10,TmpF11,TmpF12,TmpF13,TmpF14,TmpF15;
+ SDNode *Result;
+
+ // we'll need copies of F0 and F1
+ SDValue F0 = CurDAG->getRegister(IA64::F0, MVT::f64);
+ SDValue F1 = CurDAG->getRegister(IA64::F1, MVT::f64);
+
+ // OK, emit some code:
+
+ if(!isFP) {
+ // first, load the inputs into FP regs.
+ TmpF1 =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp1), 0);
+ Chain = TmpF1.getValue(1);
+ TmpF2 =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, Tmp2), 0);
+ Chain = TmpF2.getValue(1);
+
+ // next, convert the inputs to FP
+ if(isSigned) {
+ TmpF3 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF1), 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXF, dl, MVT::f64, TmpF2), 0);
+ Chain = TmpF4.getValue(1);
+ } else { // is unsigned
+ TmpF3 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF1),
+ 0);
+ Chain = TmpF3.getValue(1);
+ TmpF4 =
+ SDValue(CurDAG->getTargetNode(IA64::FCVTXUFS1, dl, MVT::f64, TmpF2),
+ 0);
+ Chain = TmpF4.getValue(1);
+ }
+
+ } else { // this is an FP divide/remainder, so we 'leak' some temp
+ // regs and assign TmpF3=Tmp1, TmpF4=Tmp2
+ TmpF3=Tmp1;
+ TmpF4=Tmp2;
+ }
+
+ // we start by computing an approximate reciprocal (good to 9 bits?)
+ // note, this instruction writes _both_ TmpF5 (answer) and TmpPR (predicate)
+ if(isFP)
+ TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS0, dl, MVT::f64,
+ MVT::i1, TmpF3, TmpF4), 0);
+ else
+ TmpF5 = SDValue(CurDAG->getTargetNode(IA64::FRCPAS1, dl, MVT::f64,
+ MVT::i1, TmpF3, TmpF4), 0);
+
+ TmpPR = TmpF5.getValue(1);
+ Chain = TmpF5.getValue(2);
+
+ SDValue minusB;
+ if(isModulus) { // for remainders, it'll be handy to have
+ // copies of -input_b
+ minusB = SDValue(CurDAG->getTargetNode(IA64::SUB, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r0, MVT::i64), Tmp2), 0);
+ Chain = minusB.getValue(1);
+ }
+
+ SDValue TmpE0, TmpY1, TmpE1, TmpY2;
+
+ SDValue OpsE0[] = { TmpF4, TmpF5, F1, TmpPR };
+ TmpE0 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64,
+ OpsE0, 4), 0);
+ Chain = TmpE0.getValue(1);
+ SDValue OpsY1[] = { TmpF5, TmpE0, TmpF5, TmpPR };
+ TmpY1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY1, 4), 0);
+ Chain = TmpY1.getValue(1);
+ SDValue OpsE1[] = { TmpE0, TmpE0, F0, TmpPR };
+ TmpE1 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsE1, 4), 0);
+ Chain = TmpE1.getValue(1);
+ SDValue OpsY2[] = { TmpY1, TmpE1, TmpY1, TmpPR };
+ TmpY2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY2, 4), 0);
+ Chain = TmpY2.getValue(1);
+
+ if(isFP) { // if this is an FP divide, we finish up here and exit early
+ if(isModulus)
+ assert(0 && "Sorry, try another FORTRAN compiler.");
+
+ SDValue TmpE2, TmpY3, TmpQ0, TmpR0;
+
+ SDValue OpsE2[] = { TmpE1, TmpE1, F0, TmpPR };
+ TmpE2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsE2, 4), 0);
+ Chain = TmpE2.getValue(1);
+ SDValue OpsY3[] = { TmpY2, TmpE2, TmpY2, TmpPR };
+ TmpY3 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsY3, 4), 0);
+ Chain = TmpY3.getValue(1);
+ SDValue OpsQ0[] = { Tmp1, TmpY3, F0, TmpPR };
+ TmpQ0 =
+ SDValue(CurDAG->getTargetNode(IA64::CFMADS1, dl, // double prec!
+ MVT::f64, OpsQ0, 4), 0);
+ Chain = TmpQ0.getValue(1);
+ SDValue OpsR0[] = { Tmp2, TmpQ0, Tmp1, TmpPR };
+ TmpR0 =
+ SDValue(CurDAG->getTargetNode(IA64::CFNMADS1, dl, // double prec!
+ MVT::f64, OpsR0, 4), 0);
+ Chain = TmpR0.getValue(1);
+
+// we want Result to have the same target register as the frcpa, so
+// we two-address hack it. See the comment "for this to work..." on
+// page 48 of Intel application note #245415
+ SDValue Ops[] = { TmpF5, TmpY3, TmpR0, TmpQ0, TmpPR };
+ Result = CurDAG->getTargetNode(IA64::TCFMADS0, dl, // d.p. s0 rndg!
+ MVT::f64, Ops, 5);
+ Chain = SDValue(Result, 1);
+ return Result; // XXX: early exit!
+ } else { // this is *not* an FP divide, so there's a bit left to do:
+
+ SDValue TmpQ2, TmpR2, TmpQ3, TmpQ;
+
+ SDValue OpsQ2[] = { TmpF3, TmpY2, F0, TmpPR };
+ TmpQ2 = SDValue(CurDAG->getTargetNode(IA64::CFMAS1, dl, MVT::f64,
+ OpsQ2, 4), 0);
+ Chain = TmpQ2.getValue(1);
+ SDValue OpsR2[] = { TmpF4, TmpQ2, TmpF3, TmpPR };
+ TmpR2 = SDValue(CurDAG->getTargetNode(IA64::CFNMAS1, dl, MVT::f64,
+ OpsR2, 4), 0);
+ Chain = TmpR2.getValue(1);
+
+// we want TmpQ3 to have the same target register as the frcpa? maybe we
+// should two-address hack it. See the comment "for this to work..." on page
+// 48 of Intel application note #245415
+ SDValue OpsQ3[] = { TmpF5, TmpR2, TmpY2, TmpQ2, TmpPR };
+ TmpQ3 = SDValue(CurDAG->getTargetNode(IA64::TCFMAS1, dl, MVT::f64,
+ OpsQ3, 5), 0);
+ Chain = TmpQ3.getValue(1);
+
+ // STORY: without these two-address instructions (TCFMAS1 and TCFMADS0)
+ // the FPSWA won't be able to help out in the case of large/tiny
+ // arguments. Other fun bugs may also appear, e.g. 0/x = x, not 0.
+
+ if(isSigned)
+ TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXTRUNCS1, dl,
+ MVT::f64, TmpQ3), 0);
+ else
+ TmpQ = SDValue(CurDAG->getTargetNode(IA64::FCVTFXUTRUNCS1, dl,
+ MVT::f64, TmpQ3), 0);
+
+ Chain = TmpQ.getValue(1);
+
+ if(isModulus) {
+ SDValue FPminusB =
+ SDValue(CurDAG->getTargetNode(IA64::SETFSIG, dl, MVT::f64, minusB),
+ 0);
+ Chain = FPminusB.getValue(1);
+ SDValue Remainder =
+ SDValue(CurDAG->getTargetNode(IA64::XMAL, dl, MVT::f64,
+ TmpQ, FPminusB, TmpF1), 0);
+ Chain = Remainder.getValue(1);
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, Remainder);
+ Chain = SDValue(Result, 1);
+ } else { // just an integer divide
+ Result = CurDAG->getTargetNode(IA64::GETFSIG, dl, MVT::i64, TmpQ);
+ Chain = SDValue(Result, 1);
+ }
+
+ return Result;
+ } // wasn't an FP divide
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *IA64DAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case IA64ISD::BRCALL: { // XXX: this is also a hack!
+ SDValue Chain = N->getOperand(0);
+ SDValue InFlag; // Null incoming flag value.
+
+ if(N->getNumOperands()==3) { // we have an incoming chain, callee and flag
+ InFlag = N->getOperand(2);
+ }
+
+ unsigned CallOpcode;
+ SDValue CallOperand;
+
+ // if we can call directly, do so
+ if (GlobalAddressSDNode *GASD =
+ dyn_cast<GlobalAddressSDNode>(N->getOperand(1))) {
+ CallOpcode = IA64::BRCALL_IPREL_GA;
+ CallOperand = CurDAG->getTargetGlobalAddress(GASD->getGlobal(), MVT::i64);
+ } else if (isa<ExternalSymbolSDNode>(N->getOperand(1))) {
+ // FIXME: we currently NEED this case for correctness, to avoid
+ // "non-pic code with imm reloc.n against dynamic symbol" errors
+ CallOpcode = IA64::BRCALL_IPREL_ES;
+ CallOperand = N->getOperand(1);
+ } else {
+ // otherwise we need to load the function descriptor,
+ // load the branch target (function)'s entry point and GP,
+ // branch (call) then restore the GP
+ SDValue FnDescriptor = N->getOperand(1);
+
+ // load the branch target's entry point [mem] and
+ // GP value [mem+8]
+ SDValue targetEntryPoint=
+ SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other,
+ FnDescriptor, CurDAG->getEntryNode()), 0);
+ Chain = targetEntryPoint.getValue(1);
+ SDValue targetGPAddr=
+ SDValue(CurDAG->getTargetNode(IA64::ADDS, dl, MVT::i64,
+ FnDescriptor,
+ CurDAG->getConstant(8, MVT::i64)), 0);
+ Chain = targetGPAddr.getValue(1);
+ SDValue targetGP =
+ SDValue(CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64,MVT::Other,
+ targetGPAddr, CurDAG->getEntryNode()), 0);
+ Chain = targetGP.getValue(1);
+
+ Chain = CurDAG->getCopyToReg(Chain, dl, IA64::r1, targetGP, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = CurDAG->getCopyToReg(Chain, dl, IA64::B6,
+ targetEntryPoint, InFlag); // FLAG these?
+ InFlag = Chain.getValue(1);
+
+ CallOperand = CurDAG->getRegister(IA64::B6, MVT::i64);
+ CallOpcode = IA64::BRCALL_INDIRECT;
+ }
+
+ // Finally, once everything is setup, emit the call itself
+ if (InFlag.getNode())
+ Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
+ MVT::Flag, CallOperand, InFlag), 0);
+ else // there might be no arguments
+ Chain = SDValue(CurDAG->getTargetNode(CallOpcode, dl, MVT::Other,
+ MVT::Flag, CallOperand, Chain), 0);
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDValue> CallResults;
+
+ CallResults.push_back(Chain);
+ CallResults.push_back(InFlag);
+
+ for (unsigned i = 0, e = CallResults.size(); i != e; ++i)
+ ReplaceUses(Op.getValue(i), CallResults[i]);
+ return NULL;
+ }
+
+ case IA64ISD::GETFD: {
+ SDValue Input = N->getOperand(0);
+ return CurDAG->getTargetNode(IA64::GETFD, dl, MVT::i64, Input);
+ }
+
+ case ISD::FDIV:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return SelectDIV(Op);
+
+ case ISD::TargetConstantFP: {
+ SDValue Chain = CurDAG->getEntryNode(); // this is a constant, so..
+
+ SDValue V;
+ ConstantFPSDNode* N2 = cast<ConstantFPSDNode>(N);
+ if (N2->getValueAPF().isPosZero()) {
+ V = CurDAG->getCopyFromReg(Chain, dl, IA64::F0, MVT::f64);
+ } else if (N2->isExactlyValue(N2->getValueType(0) == MVT::f32 ?
+ APFloat(+1.0f) : APFloat(+1.0))) {
+ V = CurDAG->getCopyFromReg(Chain, dl, IA64::F1, MVT::f64);
+ } else
+ assert(0 && "Unexpected FP constant!");
+
+ ReplaceUses(SDValue(N, 0), V);
+ return 0;
+ }
+
+ case ISD::FrameIndex: { // TODO: reduce creepyness
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, IA64::MOV, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ else
+ return CurDAG->getTargetNode(IA64::MOV, dl, MVT::i64,
+ CurDAG->getTargetFrameIndex(FI, MVT::i64));
+ }
+
+ case ISD::ConstantPool: { // TODO: nuke the constant pool
+ // (ia64 doesn't need one)
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment());
+ return CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64, // ?
+ CurDAG->getRegister(IA64::r1, MVT::i64), CPI);
+ }
+
+ case ISD::GlobalAddress: {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
+ SDValue GA = CurDAG->getTargetGlobalAddress(GV, MVT::i64);
+ SDValue Tmp =
+ SDValue(CurDAG->getTargetNode(IA64::ADDL_GA, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64), GA), 0);
+ return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, MVT::Other, Tmp,
+ CurDAG->getEntryNode());
+ }
+
+/* XXX
+ case ISD::ExternalSymbol: {
+ SDValue EA = CurDAG->getTargetExternalSymbol(
+ cast<ExternalSymbolSDNode>(N)->getSymbol(),
+ MVT::i64);
+ SDValue Tmp = CurDAG->getTargetNode(IA64::ADDL_EA, dl, MVT::i64,
+ CurDAG->getRegister(IA64::r1,
+ MVT::i64),
+ EA);
+ return CurDAG->getTargetNode(IA64::LD8, dl, MVT::i64, Tmp);
+ }
+*/
+
+ case ISD::LOAD: { // FIXME: load -1, not 1, for bools?
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Address = LD->getBasePtr();
+
+ MVT TypeBeingLoaded = LD->getMemoryVT();
+ unsigned Opc;
+ switch (TypeBeingLoaded.getSimpleVT()) {
+ default:
+#ifndef NDEBUG
+ N->dump(CurDAG);
+#endif
+ assert(0 && "Cannot load this type!");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::LD1; // first we load a byte, then compare for != 0
+ if(N->getValueType(0) == MVT::i1) { // XXX: early exit!
+ return CurDAG->SelectNodeTo(N, IA64::CMPNE, MVT::i1, MVT::Other,
+ SDValue(CurDAG->getTargetNode(Opc, dl,
+ MVT::i64,
+ Address), 0),
+ CurDAG->getRegister(IA64::r0, MVT::i64),
+ Chain);
+ }
+ /* otherwise, we want to load a bool into something bigger: LD1
+ will do that for us, so we just fall through */
+ }
+ case MVT::i8: Opc = IA64::LD1; break;
+ case MVT::i16: Opc = IA64::LD2; break;
+ case MVT::i32: Opc = IA64::LD4; break;
+ case MVT::i64: Opc = IA64::LD8; break;
+
+ case MVT::f32: Opc = IA64::LDF4; break;
+ case MVT::f64: Opc = IA64::LDF8; break;
+ }
+
+ // TODO: comment this
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), MVT::Other,
+ Address, Chain);
+ }
+
+ case ISD::STORE: {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Address = ST->getBasePtr();
+ SDValue Chain = ST->getChain();
+
+ unsigned Opc;
+ if (ISD::isNON_TRUNCStore(N)) {
+ switch (N->getOperand(1).getValueType().getSimpleVT()) {
+ default: assert(0 && "unknown type in store");
+ case MVT::i1: { // this is a bool
+ Opc = IA64::ST1; // we store either 0 or 1 as a byte
+ // first load zero!
+ SDValue Initial = CurDAG->getCopyFromReg(Chain, dl, IA64::r0, MVT::i64);
+ Chain = Initial.getValue(1);
+ // then load 1 into the same reg iff the predicate to store is 1
+ SDValue Tmp = ST->getValue();
+ Tmp =
+ SDValue(CurDAG->getTargetNode(IA64::TPCADDS, dl, MVT::i64, Initial,
+ CurDAG->getTargetConstant(1,
+ MVT::i64),
+ Tmp), 0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, Address, Tmp, Chain);
+ }
+ case MVT::i64: Opc = IA64::ST8; break;
+ case MVT::f64: Opc = IA64::STF8; break;
+ }
+ } else { // Truncating store
+ switch(ST->getMemoryVT().getSimpleVT()) {
+ default: assert(0 && "unknown type in truncstore");
+ case MVT::i8: Opc = IA64::ST1; break;
+ case MVT::i16: Opc = IA64::ST2; break;
+ case MVT::i32: Opc = IA64::ST4; break;
+ case MVT::f32: Opc = IA64::STF4; break;
+ }
+ }
+
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, N2, N1, Chain);
+ }
+
+ case ISD::BRCOND: {
+ SDValue Chain = N->getOperand(0);
+ SDValue CC = N->getOperand(1);
+ MachineBasicBlock *Dest =
+ cast<BasicBlockSDNode>(N->getOperand(2))->getBasicBlock();
+ //FIXME - we do NOT need long branches all the time
+ return CurDAG->SelectNodeTo(N, IA64::BRLCOND_NOTCALL, MVT::Other, CC,
+ CurDAG->getBasicBlock(Dest), Chain);
+ }
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END: {
+ int64_t Amt = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned Opc = N->getOpcode() == ISD::CALLSEQ_START ?
+ IA64::ADJUSTCALLSTACKDOWN : IA64::ADJUSTCALLSTACKUP;
+ SDValue N0 = N->getOperand(0);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::Other, getI64Imm(Amt), N0);
+ }
+
+ case ISD::BR:
+ // FIXME: we don't need long branches all the time!
+ SDValue N0 = N->getOperand(0);
+ return CurDAG->SelectNodeTo(N, IA64::BRL_NOTCALL, MVT::Other,
+ N->getOperand(1), N0);
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// createIA64DAGToDAGInstructionSelector - This pass converts a legalized DAG
+/// into an IA64-specific DAG, ready for instruction scheduling.
+///
+FunctionPass
+*llvm::createIA64DAGToDAGInstructionSelector(IA64TargetMachine &TM) {
+ return new IA64DAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
new file mode 100644
index 000000000000..34a0686564c0
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -0,0 +1,622 @@
+//===-- IA64ISelLowering.cpp - IA64 DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64ISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64ISelLowering.h"
+#include "IA64MachineFunctionInfo.h"
+#include "IA64TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // register class for general registers
+ addRegisterClass(MVT::i64, IA64::GRRegisterClass);
+
+ // register class for FP registers
+ addRegisterClass(MVT::f64, IA64::FPRegisterClass);
+
+ // register class for predicate registers
+ addRegisterClass(MVT::i1, IA64::PRRegisterClass);
+
+ setLoadExtAction(ISD::EXTLOAD , MVT::i1 , Promote);
+
+ setLoadExtAction(ISD::ZEXTLOAD , MVT::i1 , Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i1 , Promote);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i8 , Expand);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i16 , Expand);
+ setLoadExtAction(ISD::SEXTLOAD , MVT::i32 , Expand);
+
+ setOperationAction(ISD::BRIND , MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+
+ // ia64 uses SELECT not SELECT_CC
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+
+ // We need to handle ISD::RET for void functions ourselves,
+ // so we get a chance to restore ar.pfs before adding a
+ // br.ret insn
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setShiftAmountType(MVT::i64);
+
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::UREM , MVT::f32 , Expand);
+ setOperationAction(ISD::UREM , MVT::f64 , Expand);
+
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+
+ // We don't support sin/cos/sqrt/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // FIXME: IA64 supports fcopysign natively!
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // IA64 has ctlz in the form of the 'fnorm' instruction. The Legalizer
+ // expansion for ctlz/cttz in terms of ctpop is much larger, but lower
+ // latency.
+ // FIXME: Custom lower CTLZ when compiling for size?
+ setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTL , MVT::i64 , Expand);
+ setOperationAction(ISD::ROTR , MVT::i64 , Expand);
+
+ // FIXME: IA64 has this, but is not implemented. should be mux @rev
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+ setStackPointerRegisterToSaveRestore(IA64::r12);
+
+ setJumpBufSize(704); // on ia64-linux, jmp_bufs are 704 bytes..
+ setJumpBufAlignment(16); // ...and must be 16-byte aligned
+
+ computeRegisterProperties();
+
+ addLegalFPImmediate(APFloat(+0.0));
+ addLegalFPImmediate(APFloat(-0.0));
+ addLegalFPImmediate(APFloat(+1.0));
+ addLegalFPImmediate(APFloat(-1.0));
+}
+
+const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case IA64ISD::GETFD: return "IA64ISD::GETFD";
+ case IA64ISD::BRCALL: return "IA64ISD::BRCALL";
+ case IA64ISD::RET_FLAG: return "IA64ISD::RET_FLAG";
+ }
+}
+
+MVT IA64TargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i1;
+}
+
+void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ //
+ // add beautiful description of IA64 stack frame format
+ // here (from intel 24535803.pdf most likely)
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ GP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ SP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ RP = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+
+ MachineBasicBlock& BB = MF.front();
+
+ unsigned args_int[] = {IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39};
+
+ unsigned args_FP[] = {IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12,IA64::F13,IA64::F14, IA64::F15};
+
+ unsigned argVreg[8];
+ unsigned argPreg[8];
+ unsigned argOpc[8];
+
+ unsigned used_FPArgs = 0; // how many FP args have been used so far?
+
+ unsigned ArgOffset = 0;
+ int count = 0;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+ {
+ SDValue newroot, argt;
+ if(count < 8) { // need to fix this logic? maybe.
+
+ switch (getValueType(I->getType()).getSimpleVT()) {
+ default:
+ assert(0 && "ERROR in LowerArgs: can't lower this type of arg.\n");
+ case MVT::f32:
+ // fixme? (well, will need to for weird FP structy stuff,
+ // see intel ABI docs)
+ case MVT::f64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_FP[used_FPArgs]);
+ MF.getRegInfo().addLiveIn(args_FP[used_FPArgs]);
+ // mark this reg as liveIn
+ // floating point args go into f8..f15 as-needed, the increment
+ argVreg[count] = // is below..:
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::f64));
+ // FP args go into f8..f15 as needed: (hence the ++)
+ argPreg[count] = args_FP[used_FPArgs++];
+ argOpc[count] = IA64::FMOV;
+ argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), dl,
+ argVreg[count], MVT::f64);
+ if (I->getType() == Type::FloatTy)
+ argt = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, argt,
+ DAG.getIntPtrConstant(0));
+ break;
+ case MVT::i1: // NOTE: as far as C abi stuff goes,
+ // bools are just boring old ints
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+//XXX BuildMI(&BB, IA64::IDEF, 0, args_int[count]);
+ MF.getRegInfo().addLiveIn(args_int[count]);
+ // mark this register as liveIn
+ argVreg[count] =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ argPreg[count] = args_int[count];
+ argOpc[count] = IA64::MOV;
+ argt = newroot =
+ DAG.getCopyFromReg(DAG.getRoot(), dl, argVreg[count], MVT::i64);
+ if ( getValueType(I->getType()) != MVT::i64)
+ argt = DAG.getNode(ISD::TRUNCATE, dl, getValueType(I->getType()),
+ newroot);
+ break;
+ }
+ } else { // more than 8 args go into the frame
+ // Create the frame index object for this incoming parameter...
+ ArgOffset = 16 + 8 * (count - 8);
+ int FI = MFI->CreateFixedObject(8, ArgOffset);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+ argt = newroot = DAG.getLoad(getValueType(I->getType()), dl,
+ DAG.getEntryNode(), FIN, NULL, 0);
+ }
+ ++count;
+ DAG.setRoot(newroot.getValue(1));
+ ArgValues.push_back(argt);
+ }
+
+
+ // Create a vreg to hold the output of (what will become)
+ // the "alloc" instruction
+ VirtGPR = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ BuildMI(&BB, dl, TII->get(IA64::PSEUDO_ALLOC), VirtGPR);
+ // we create a PSEUDO_ALLOC (pseudo)instruction for now
+/*
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::IDEF, 0, IA64::r12);
+ BuildMI(&BB, IA64::IDEF, 0, IA64::rp);
+ // ..hmm.
+
+ BuildMI(&BB, IA64::MOV, 1, GP).addReg(IA64::r1);
+
+ // hmm:
+ BuildMI(&BB, IA64::MOV, 1, SP).addReg(IA64::r12);
+ BuildMI(&BB, IA64::MOV, 1, RP).addReg(IA64::rp);
+ // ..hmm.
+*/
+
+ unsigned tempOffset=0;
+
+ // if this is a varargs function, we simply lower llvm.va_start by
+ // pointing to the first entry
+ if(F.isVarArg()) {
+ tempOffset=0;
+ VarArgsFrameIndex = MFI->CreateFixedObject(8, tempOffset);
+ }
+
+ // here we actually do the moving of args, and store them to the stack
+ // too if this is a varargs function:
+ for (int i = 0; i < count && i < 8; ++i) {
+ BuildMI(&BB, dl, TII->get(argOpc[i]), argVreg[i]).addReg(argPreg[i]);
+ if(F.isVarArg()) {
+ // if this is a varargs function, we copy the input registers to the stack
+ int FI = MFI->CreateFixedObject(8, tempOffset);
+ tempOffset+=8; //XXX: is it safe to use r22 like this?
+ BuildMI(&BB, dl, TII->get(IA64::MOV), IA64::r22).addFrameIndex(FI);
+ // FIXME: we should use st8.spill here, one day
+ BuildMI(&BB, dl, TII->get(IA64::ST8), IA64::r22).addReg(argPreg[i]);
+ }
+ }
+
+ // Finally, inform the code generator which regs we return values in.
+ // (see the ISD::RET: case in the instruction selector)
+ switch (getValueType(F.getReturnType()).getSimpleVT()) {
+ default: assert(0 && "i have no idea where to return this type!");
+ case MVT::isVoid: break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ MF.getRegInfo().addLiveOut(IA64::r8);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ MF.getRegInfo().addLiveOut(IA64::F8);
+ break;
+ }
+}
+
+std::pair<SDValue, SDValue>
+IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg,
+ bool isInreg, unsigned CallingConv,
+ bool isTailCall, SDValue Callee,
+ ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl) {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ unsigned NumBytes = 16;
+ unsigned outRegsUsed = 0;
+
+ if (Args.size() > 8) {
+ NumBytes += (Args.size() - 8) * 8;
+ outRegsUsed = 8;
+ } else {
+ outRegsUsed = Args.size();
+ }
+
+ // FIXME? this WILL fail if we ever try to pass around an arg that
+ // consumes more than a single output slot (a 'real' double, int128
+ // some sort of aggregate etc.), as we'll underestimate how many 'outX'
+ // registers we use. Hopefully, the assembler will notice.
+ MF.getInfo<IA64FunctionInfo>()->outRegsUsed=
+ std::max(outRegsUsed, MF.getInfo<IA64FunctionInfo>()->outRegsUsed);
+
+ // keep stack frame 16-byte aligned
+ // assert(NumBytes==((NumBytes+15) & ~15) &&
+ // "stack frame not 16-byte aligned!");
+ NumBytes = (NumBytes+15) & ~15;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue StackPtr;
+ std::vector<SDValue> Stores;
+ std::vector<SDValue> Converts;
+ std::vector<SDValue> RegValuesToPass;
+ unsigned ArgOffset = 16;
+
+ for (unsigned i = 0, e = Args.size(); i != e; ++i)
+ {
+ SDValue Val = Args[i].Node;
+ MVT ObjectVT = Val.getValueType();
+ SDValue ValToStore(0, 0), ValToConvert(0, 0);
+ unsigned ObjSize=8;
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "unexpected argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32: {
+ //promote to 64-bits, sign/zero extending based on type
+ //of the argument
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+ Val = DAG.getNode(ExtendKind, dl, MVT::i64, Val);
+ // XXX: fall through
+ }
+ case MVT::i64:
+ //ObjSize = 8;
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ }
+ break;
+ case MVT::f32:
+ //promote to 64-bits
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ // XXX: fall through
+ case MVT::f64:
+ if(RegValuesToPass.size() >= 8) {
+ ValToStore = Val;
+ } else {
+ RegValuesToPass.push_back(Val);
+ if(1 /* TODO: if(calling external or varadic function)*/ ) {
+ ValToConvert = Val; // additionally pass this FP value as an int
+ }
+ }
+ break;
+ }
+
+ if(ValToStore.getNode()) {
+ if(!StackPtr.getNode()) {
+ StackPtr = DAG.getRegister(IA64::r12, MVT::i64);
+ }
+ SDValue PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, PtrOff);
+ Stores.push_back(DAG.getStore(Chain, dl, ValToStore, PtrOff, NULL, 0));
+ ArgOffset += ObjSize;
+ }
+
+ if(ValToConvert.getNode()) {
+ Converts.push_back(DAG.getNode(IA64ISD::GETFD, dl,
+ MVT::i64, ValToConvert));
+ }
+ }
+
+ // Emit all stores, make sure they occur before any copies into physregs.
+ if (!Stores.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &Stores[0],Stores.size());
+
+ static const unsigned IntArgRegs[] = {
+ IA64::out0, IA64::out1, IA64::out2, IA64::out3,
+ IA64::out4, IA64::out5, IA64::out6, IA64::out7
+ };
+
+ static const unsigned FPArgRegs[] = {
+ IA64::F8, IA64::F9, IA64::F10, IA64::F11,
+ IA64::F12, IA64::F13, IA64::F14, IA64::F15
+ };
+
+ SDValue InFlag;
+
+ // save the current GP, SP and RP : FIXME: do we need to do all 3 always?
+ SDValue GPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r1,
+ MVT::i64, InFlag);
+ Chain = GPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDValue SPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::r12,
+ MVT::i64, InFlag);
+ Chain = SPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+ SDValue RPBeforeCall = DAG.getCopyFromReg(Chain, dl, IA64::rp,
+ MVT::i64, InFlag);
+ Chain = RPBeforeCall.getValue(1);
+ InFlag = Chain.getValue(2);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing integer args into regs out[0-7]
+ // mapped 1:1 and the FP args into regs F8-F15 "lazily"
+ // TODO: for performance, we should only copy FP args into int regs when we
+ // know this is required (i.e. for varardic or external (unknown) functions)
+
+ // first to the FP->(integer representation) conversions, these are
+ // flagged for now, but shouldn't have to be (TODO)
+ unsigned seenConverts = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ if(RegValuesToPass[i].getValueType().isFloatingPoint()) {
+ Chain = DAG.getCopyToReg(Chain, dl, IntArgRegs[i],
+ Converts[seenConverts++], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ }
+
+ // next copy args into the usual places, these are flagged
+ unsigned usedFPArgs = 0;
+ for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl,
+ RegValuesToPass[i].getValueType().isInteger() ?
+ IntArgRegs[i] : FPArgRegs[usedFPArgs++], RegValuesToPass[i], InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+/*
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i64);
+ }
+*/
+
+ std::vector<MVT> NodeTys;
+ std::vector<SDValue> CallOperands;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ CallOperands.push_back(Chain);
+ CallOperands.push_back(Callee);
+
+ // emit the call itself
+ if (InFlag.getNode())
+ CallOperands.push_back(InFlag);
+ else
+ assert(0 && "this should never happen!\n");
+
+ // to make way for a hack:
+ Chain = DAG.getNode(IA64ISD::BRCALL, dl, NodeTys,
+ &CallOperands[0], CallOperands.size());
+ InFlag = Chain.getValue(1);
+
+ // restore the GP, SP and RP after the call
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::r1, GPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::r12, SPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, IA64::rp, RPBeforeCall, InFlag);
+ InFlag = Chain.getValue(1);
+
+ std::vector<MVT> RetVals;
+ RetVals.push_back(MVT::Other);
+ RetVals.push_back(MVT::Flag);
+
+ MVT RetTyVT = getValueType(RetTy);
+ SDValue RetVal;
+ if (RetTyVT != MVT::isVoid) {
+ switch (RetTyVT.getSimpleVT()) {
+ default: assert(0 && "Unknown value type to return!");
+ case MVT::i1: { // bools are just like other integers (returned in r8)
+ // we *could* fall through to the truncate below, but this saves a
+ // few redundant predicate ops
+ SDValue boolInR8 = DAG.getCopyFromReg(Chain, dl, IA64::r8,
+ MVT::i64,InFlag);
+ InFlag = boolInR8.getValue(2);
+ Chain = boolInR8.getValue(1);
+ SDValue zeroReg = DAG.getCopyFromReg(Chain, dl, IA64::r0,
+ MVT::i64, InFlag);
+ InFlag = zeroReg.getValue(2);
+ Chain = zeroReg.getValue(1);
+
+ RetVal = DAG.getSetCC(dl, MVT::i1, boolInR8, zeroReg, ISD::SETNE);
+ break;
+ }
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+
+ // keep track of whether it is sign or zero extended (todo: bools?)
+/* XXX
+ RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext :ISD::AssertZext,
+ dl, MVT::i64, RetVal, DAG.getValueType(RetTyVT));
+*/
+ RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal);
+ break;
+ case MVT::i64:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::r8, MVT::i64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ case MVT::f32:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ RetVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, RetVal,
+ DAG.getIntPtrConstant(0));
+ break;
+ case MVT::f64:
+ RetVal = DAG.getCopyFromReg(Chain, dl, IA64::F8, MVT::f64, InFlag);
+ Chain = RetVal.getValue(1);
+ InFlag = RetVal.getValue(2); // XXX dead
+ break;
+ }
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ return std::make_pair(RetVal, Chain);
+}
+
+SDValue IA64TargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for IA64.");
+ case ISD::RET: {
+ SDValue AR_PFSVal, Copy;
+
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1:
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64);
+ AR_PFSVal = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, IA64::AR_PFS,
+ AR_PFSVal);
+ return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other, AR_PFSVal);
+ case 3: {
+ // Copy the result into the output register & restore ar.pfs
+ MVT ArgVT = Op.getOperand(1).getValueType();
+ unsigned ArgReg = ArgVT.isInteger() ? IA64::r8 : IA64::F8;
+
+ AR_PFSVal = DAG.getCopyFromReg(Op.getOperand(0), dl, VirtGPR, MVT::i64);
+ Copy = DAG.getCopyToReg(AR_PFSVal.getValue(1), dl, ArgReg,
+ Op.getOperand(1), SDValue());
+ AR_PFSVal = DAG.getCopyToReg(Copy.getValue(0), dl,
+ IA64::AR_PFS, AR_PFSVal, Copy.getValue(1));
+ return DAG.getNode(IA64ISD::RET_FLAG, dl, MVT::Other,
+ AR_PFSVal, AR_PFSVal.getValue(1));
+ }
+ }
+ return SDValue();
+ }
+ case ISD::VAARG: {
+ MVT VT = getPointerTy();
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ SDValue VAList = DAG.getLoad(VT, dl, Op.getOperand(0), Op.getOperand(1),
+ SV, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue VAIncr = DAG.getNode(ISD::ADD, dl, VT, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ VT));
+ // Store the incremented VAList to the legalized pointer
+ VAIncr = DAG.getStore(VAList.getValue(1), dl, VAIncr,
+ Op.getOperand(1), SV, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(Op.getValueType(), dl, VAIncr, VAList, NULL, 0);
+ }
+ case ISD::VASTART: {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i64);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: break;
+ case ISD::FRAMEADDR: break;
+ }
+ return SDValue();
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h
new file mode 100644
index 000000000000..edf7eb895ad2
--- /dev/null
+++ b/lib/Target/IA64/IA64ISelLowering.h
@@ -0,0 +1,76 @@
+//===-- IA64ISelLowering.h - IA64 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that IA64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64_IA64ISELLOWERING_H
+#define LLVM_TARGET_IA64_IA64ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "IA64.h"
+
+namespace llvm {
+ namespace IA64ISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// GETFD - the getf.d instruction takes a floating point operand and
+ /// returns its 64-bit memory representation as an i64
+ GETFD,
+
+ // TODO: explain this hack
+ BRCALL,
+
+ // RET_FLAG - Return with a flag operand
+ RET_FLAG
+ };
+ }
+
+ class IA64TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ //int ReturnAddrIndex; // FrameIndex for return slot.
+ unsigned GP, SP, RP; // FIXME - clean this mess up
+ public:
+ explicit IA64TargetLowering(TargetMachine &TM);
+
+ unsigned VirtGPR; // this is public so it can be accessed in the selector
+ // for ISD::RET. add an accessor instead? FIXME
+ const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType: return ISD::SETCC's result type.
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// LowerArguments - This hook must be implemented to indicate how we should
+ /// lower the arguments for the specified function, into the specified DAG.
+ virtual void LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl);
+
+ /// LowerCallTo - This hook lowers an abstract call to a function into an
+ /// actual call.
+ virtual std::pair<SDValue, SDValue>
+ LowerCallTo(SDValue Chain, const Type *RetTy,
+ bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg,
+ unsigned CC, bool isTailCall,
+ SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
+ DebugLoc dl);
+
+ /// LowerOperation - for custom lowering specific ops
+ /// (currently, only "ret void")
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ };
+}
+
+#endif // LLVM_TARGET_IA64_IA64ISELLOWERING_H
diff --git a/lib/Target/IA64/IA64InstrBuilder.h b/lib/Target/IA64/IA64InstrBuilder.h
new file mode 100644
index 000000000000..a5d4dca530fb
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrBuilder.h
@@ -0,0 +1,40 @@
+//===-- IA64PCInstrBuilder.h - Aids for building IA64 insts -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64_INSTRBUILDER_H
+#define IA64_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrFormats.td b/lib/Target/IA64/IA64InstrFormats.td
new file mode 100644
index 000000000000..c465880d3e1a
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrFormats.td
@@ -0,0 +1,80 @@
+//===- IA64InstrFormats.td - IA64 Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// - Warning: the stuff in here isn't really being used, so is mostly
+// junk. It'll get fixed as the JIT gets built.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstIA64<bits<4> op, dag OOL, dag IOL, string asmstr> : Instruction {
+ // IA64 instruction baseline
+ field bits<41> Inst;
+ let Namespace = "IA64";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+
+ let Inst{40-37} = op;
+}
+
+//"Each Itanium instruction is categorized into one of six types."
+//We should have:
+// A, I, M, F, B, L+X
+
+class AForm<bits<4> opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Inst{5-0} = qpReg;
+}
+
+class AForm_DAG<bits<4> opcode, bits<6> qpReg, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Pattern = pattern;
+ let Inst{5-0} = qpReg;
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BForm<bits<4> opcode, bits<6> x6, bits<3> btype, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+
+ let Inst{32-27} = x6;
+ let Inst{8-6} = btype;
+}
+
+class MForm<bits<4> opcode, bits<6> x6, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+ bits<7> Ra;
+ bits<7> Rb;
+ bits<16> disp;
+
+ let Inst{35-30} = x6;
+// let Inst{20-16} = Rb;
+ let Inst{15-0} = disp;
+}
+
+class RawForm<bits<4> opcode, bits<26> rest, dag OOL, dag IOL, string asmstr> :
+ InstIA64<opcode, OOL, IOL, asmstr> {
+ let Inst{25-0} = rest;
+}
+
+// Pseudo instructions.
+class PseudoInstIA64<dag OOL, dag IOL, string nm> : InstIA64<0, OOL, IOL, nm> {
+}
+
+class PseudoInstIA64_DAG<dag OOL, dag IOL, string nm, list<dag> pattern>
+ : InstIA64<0, OOL, IOL, nm> {
+ let Pattern = pattern;
+}
+
diff --git a/lib/Target/IA64/IA64InstrInfo.cpp b/lib/Target/IA64/IA64InstrInfo.cpp
new file mode 100644
index 000000000000..5f89d4f13994
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.cpp
@@ -0,0 +1,193 @@
+//===- IA64InstrInfo.cpp - IA64 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64InstrInfo.h"
+#include "IA64.h"
+#include "IA64InstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "IA64GenInstrInfo.inc"
+using namespace llvm;
+
+IA64InstrInfo::IA64InstrInfo()
+ : TargetInstrInfoImpl(IA64Insts, sizeof(IA64Insts)/sizeof(IA64Insts[0])),
+ RI(*this) {
+}
+
+
+bool IA64InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& SrcSR, unsigned& DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ if (oc == IA64::MOV || oc == IA64::FMOV) {
+ // TODO: this doesn't detect predicate moves
+ assert(MI.getNumOperands() >= 2 &&
+ /* MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() && */
+ "invalid register-register move instruction");
+ if (MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg()) {
+ // if both operands of the MOV/FMOV are registers, then
+ // yes, this is a move instruction
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ }
+ return false; // we don't consider e.g. %regN = MOV <FrameIndex #x> a
+ // move instruction
+}
+
+unsigned
+IA64InstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(IA64::BRL_NOTCALL)).addMBB(TBB);
+ return 1;
+}
+
+bool IA64InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if(DestRC == IA64::PRRegisterClass ) // if a bool, we use pseudocode
+ // (SrcReg) DestReg = cmp.eq.unc(r0, r0)
+ BuildMI(MBB, MI, DL, get(IA64::PCMPEQUNC), DestReg)
+ .addReg(IA64::r0).addReg(IA64::r0).addReg(SrcReg);
+ else // otherwise, MOV works (for both gen. regs and FP regs)
+ BuildMI(MBB, MI, DL, get(IA64::MOV), DestReg).addReg(SrcReg);
+
+ return true;
+}
+
+void IA64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::STF_SPILL)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::ST8)).addFrameIndex(FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else if (RC == IA64::PRRegisterClass) {
+ /* we use IA64::r2 as a temporary register for doing this hackery. */
+ // first we load 0:
+ BuildMI(MBB, MI, DL, get(IA64::MOV), IA64::r2).addReg(IA64::r0);
+ // then conditionally add 1:
+ BuildMI(MBB, MI, DL, get(IA64::CADDIMM22), IA64::r2).addReg(IA64::r2)
+ .addImm(1).addReg(SrcReg, getKillRegState(isKill));
+ // and then store it to the stack
+ BuildMI(MBB, MI, DL, get(IA64::ST8))
+ .addFrameIndex(FrameIdx)
+ .addReg(IA64::r2);
+ } else assert(0 &&
+ "sorry, I don't know how to store this sort of reg in the stack\n");
+}
+
+void IA64InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == IA64::FPRegisterClass) {
+ Opc = IA64::STF8;
+ } else if (RC == IA64::GRRegisterClass) {
+ Opc = IA64::ST8;
+ } else if (RC == IA64::PRRegisterClass) {
+ Opc = IA64::ST1;
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to store this sort of reg\n");
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+ return;
+
+}
+
+void IA64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC)const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == IA64::FPRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::LDF_FILL), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::GRRegisterClass) {
+ BuildMI(MBB, MI, DL, get(IA64::LD8), DestReg).addFrameIndex(FrameIdx);
+ } else if (RC == IA64::PRRegisterClass) {
+ // first we load a byte from the stack into r2, our 'predicate hackery'
+ // scratch reg
+ BuildMI(MBB, MI, DL, get(IA64::LD8), IA64::r2).addFrameIndex(FrameIdx);
+ // then we compare it to zero. If it _is_ zero, compare-not-equal to
+ // r0 gives us 0, which is what we want, so that's nice.
+ BuildMI(MBB, MI, DL, get(IA64::CMPNE), DestReg)
+ .addReg(IA64::r2)
+ .addReg(IA64::r0);
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to load this sort of reg from the stack\n");
+ }
+}
+
+void IA64InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == IA64::FPRegisterClass) {
+ Opc = IA64::LDF8;
+ } else if (RC == IA64::GRRegisterClass) {
+ Opc = IA64::LD8;
+ } else if (RC == IA64::PRRegisterClass) {
+ Opc = IA64::LD1;
+ } else {
+ assert(0 &&
+ "sorry, I don't know how to load this sort of reg\n");
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
diff --git a/lib/Target/IA64/IA64InstrInfo.h b/lib/Target/IA64/IA64InstrInfo.h
new file mode 100644
index 000000000000..79236c2c7c86
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.h
@@ -0,0 +1,70 @@
+//===- IA64InstrInfo.h - IA64 Instruction Information ----------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64INSTRUCTIONINFO_H
+#define IA64INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "IA64RegisterInfo.h"
+
+namespace llvm {
+
+class IA64InstrInfo : public TargetInstrInfoImpl {
+ const IA64RegisterInfo RI;
+public:
+ IA64InstrInfo();
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const IA64RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64InstrInfo.td b/lib/Target/IA64/IA64InstrInfo.td
new file mode 100644
index 000000000000..2ab9897bddeb
--- /dev/null
+++ b/lib/Target/IA64/IA64InstrInfo.td
@@ -0,0 +1,751 @@
+//===- IA64InstrInfo.td - Describe the IA64 Instruction Set -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+include "IA64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// IA-64 specific DAG Nodes.
+//
+
+def IA64getfd : SDNode<"IA64ISD::GETFD", SDTFPToIntOp, []>;
+
+def retflag : SDNode<"IA64ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===---------
+// Instruction types
+
+class isA { bit A=1; } // I or M unit
+class isM { bit M=1; } // M unit
+class isI { bit I=1; } // I unit
+class isB { bit B=1; } // B unit
+class isF { bit F=1; } // F unit
+class isLX { bit LX=1; } // I/B
+
+//===---------
+
+def u2imm : Operand<i8>;
+def u6imm : Operand<i8>;
+def s8imm : Operand<i8> {
+ let PrintMethod = "printS8ImmOperand";
+}
+def s14imm : Operand<i64> {
+ let PrintMethod = "printS14ImmOperand";
+}
+def s22imm : Operand<i64> {
+ let PrintMethod = "printS22ImmOperand";
+}
+def u64imm : Operand<i64> {
+ let PrintMethod = "printU64ImmOperand";
+}
+def s64imm : Operand<i64> {
+ let PrintMethod = "printS64ImmOperand";
+}
+
+let PrintMethod = "printGlobalOperand" in
+ def globaladdress : Operand<i64>;
+
+// the asmprinter needs to know about calls
+let PrintMethod = "printCallOperand" in
+ def calltarget : Operand<i64>;
+
+/* new daggy action!!! */
+
+def is32ones : PatLeaf<(i64 imm), [{
+ // is32ones predicate - True if the immediate is 0x00000000FFFFFFFF
+ // Used to create ZXT4s appropriately
+ uint64_t v = (uint64_t)N->getZExtValue();
+ return (v == 0x00000000FFFFFFFFLL);
+}]>;
+
+// isMIXable predicates - True if the immediate is
+// 0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF
+// etc, through 0x00000000FFFFFFFF
+// Used to test for the suitability of mix*
+def isMIX1Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFF00FF00FF00FF00LL);
+}]>;
+def isMIX1Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x00FF00FF00FF00FFLL);
+}]>;
+def isMIX2Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFFFF0000FFFF0000LL);
+}]>;
+def isMIX2Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x0000FFFF0000FFFFLL);
+}]>;
+def isMIX4Lable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0xFFFFFFFF00000000LL);
+}]>;
+def isMIX4Rable: PatLeaf<(i64 imm), [{
+ return((uint64_t)N->getZExtValue()==0x00000000FFFFFFFFLL);
+}]>;
+
+def isSHLADDimm: PatLeaf<(i64 imm), [{
+ // isSHLADDimm predicate - True if the immediate is exactly 1, 2, 3 or 4
+ // - 0 is *not* okay.
+ // Used to create shladd instructions appropriately
+ int64_t v = (int64_t)N->getZExtValue();
+ return (v >= 1 && v <= 4);
+}]>;
+
+def immSExt14 : PatLeaf<(i64 imm), [{
+ // immSExt14 predicate - True if the immediate fits in a 14-bit sign extended
+ // field. Used by instructions like 'adds'.
+ int64_t v = (int64_t)N->getZExtValue();
+ return (v <= 8191 && v >= -8192);
+}]>;
+
+// imm64 predicate - True if the immediate fits in a 64-bit
+// field - i.e., true. used to keep movl happy
+def imm64 : PatLeaf<(i64 imm)>;
+
+def ADD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2",
+ [(set GR:$dst, (add GR:$src1, GR:$src2))]>, isA;
+
+def ADD1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "add $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (add GR:$src1, GR:$src2), 1))]>, isA;
+
+def ADDS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1",
+ [(set GR:$dst, (add GR:$src1, immSExt14:$imm))]>, isA;
+
+def MOVL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm),
+ "movl $dst = $imm",
+ [(set GR:$dst, imm64:$imm)]>, isLX;
+
+def ADDL_GA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, globaladdress:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+// hmm
+def ADDL_EA : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, calltarget:$imm),
+ "addl $dst = $imm, $src1",
+ []>, isA;
+
+def SUB : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2",
+ [(set GR:$dst, (sub GR:$src1, GR:$src2))]>, isA;
+
+def SUB1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "sub $dst = $src1, $src2, 1",
+ [(set GR:$dst, (add (sub GR: $src1, GR:$src2), -1))]>, isA;
+
+let isTwoAddress = 1 in {
+def TPCADDIMM22 : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $dst">, isA;
+def TPCADDS : AForm_DAG<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, s14imm:$imm, PR:$qp),
+ "($qp) adds $dst = $imm, $dst",
+ []>, isA;
+def TPCMPIMM8NE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src1, s22imm:$imm, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst , p0 = $imm, $src2">, isA;
+}
+
+// zero extend a bool (predicate reg) into an integer reg
+def ZXTb : Pat<(zext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+def AXTb : Pat<(anyext PR:$src),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src)>;
+
+// normal sign/zero-extends
+def SXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt1 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i8))]>, isI;
+def ZXT1 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt1 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 255))]>, isI;
+def SXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt2 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i16))]>, isI;
+def ZXT2 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt2 $dst = $src",
+ [(set GR:$dst, (and GR:$src, 65535))]>, isI;
+def SXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "sxt4 $dst = $src",
+ [(set GR:$dst, (sext_inreg GR:$src, i32))]>, isI;
+def ZXT4 : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "zxt4 $dst = $src",
+ [(set GR:$dst, (and GR:$src, is32ones))]>, isI;
+
+// fixme: shrs vs shru?
+def MIX1L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix1.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX1Lable),
+ (and (srl GR:$src2, (i64 8)), isMIX1Lable)))]>, isI;
+
+def MIX2L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix2.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX2Lable),
+ (and (srl GR:$src2, (i64 16)), isMIX2Lable)))]>, isI;
+
+def MIX4L : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix4.l $dst = $src1, $src2",
+ [(set GR:$dst, (or (and GR:$src1, isMIX4Lable),
+ (and (srl GR:$src2, (i64 32)), isMIX4Lable)))]>, isI;
+
+def MIX1R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix1.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 8)), isMIX1Rable),
+ (and GR:$src2, isMIX1Rable)))]>, isI;
+
+def MIX2R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix2.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 16)), isMIX2Rable),
+ (and GR:$src2, isMIX2Rable)))]>, isI;
+
+def MIX4R : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "mix4.r $dst = $src1, $src2",
+ [(set GR:$dst, (or (and (shl GR:$src1, (i64 32)), isMIX4Rable),
+ (and GR:$src2, isMIX4Rable)))]>, isI;
+
+def GETFSIGD : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.sig $dst = $src",
+ []>, isM;
+
+def SETFSIGD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.sig $dst = $src",
+ []>, isM;
+
+def XMALD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.h $dst = $src1, $src2, $src3",
+ []>, isF;
+def XMAHUD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.hu $dst = $src1, $src2, $src3",
+ []>, isF;
+
+// pseudocode for integer multiplication
+def : Pat<(mul GR:$src1, GR:$src2),
+ (GETFSIGD (XMALD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhs GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+def : Pat<(mulhu GR:$src1, GR:$src2),
+ (GETFSIGD (XMAHUD (SETFSIGD GR:$src1), (SETFSIGD GR:$src2), F0))>;
+
+// TODO: addp4 (addp4 dst = src, r0 is a 32-bit add)
+// has imm form, too
+
+// def ADDS : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+// "adds $dst = $imm, $src1">;
+
+def AND : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "and $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, GR:$src2))]>, isA;
+def ANDCM : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "andcm $dst = $src1, $src2",
+ [(set GR:$dst, (and GR:$src1, (not GR:$src2)))]>, isA;
+// TODO: and/andcm/or/xor/add/sub/shift immediate forms
+def OR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "or $dst = $src1, $src2",
+ [(set GR:$dst, (or GR:$src1, GR:$src2))]>, isA;
+
+def pOR : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) or $dst = $src1, $src2">, isA;
+
+// the following are all a bit unfortunate: we throw away the complement
+// of the compare!
+def CMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq GR:$src1, GR:$src2))]>, isA;
+def CMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt GR:$src1, GR:$src2))]>, isA;
+def CMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge GR:$src1, GR:$src2))]>, isA;
+def CMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt GR:$src1, GR:$src2))]>, isA;
+def CMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle GR:$src1, GR:$src2))]>, isA;
+def CMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ne $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne GR:$src1, GR:$src2))]>, isA;
+def CMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.ltu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult GR:$src1, GR:$src2))]>, isA;
+def CMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.gtu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt GR:$src1, GR:$src2))]>, isA;
+def CMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.leu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule GR:$src1, GR:$src2))]>, isA;
+def CMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2),
+ "cmp.geu $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge GR:$src1, GR:$src2))]>, isA;
+
+// and we do the whole thing again for FP compares!
+def FCMPEQ : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.eq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (seteq FP:$src1, FP:$src2))]>, isF;
+def FCMPGT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setgt FP:$src1, FP:$src2))]>, isF;
+def FCMPGE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setge FP:$src1, FP:$src2))]>, isF;
+def FCMPLT : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setlt FP:$src1, FP:$src2))]>, isF;
+def FCMPLE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setle FP:$src1, FP:$src2))]>, isF;
+def FCMPNE : AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.neq $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setne FP:$src1, FP:$src2))]>, isF;
+def FCMPLTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.lt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setult FP:$src1, FP:$src2))]>, isF;
+def FCMPGTU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.gt $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setugt FP:$src1, FP:$src2))]>, isF;
+def FCMPLEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.le $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setule FP:$src1, FP:$src2))]>, isF;
+def FCMPGEU: AForm_DAG<0x03, 0x0b, (outs PR:$dst), (ins FP:$src1, FP:$src2),
+ "fcmp.ge $dst, p0 = $src1, $src2",
+ [(set PR:$dst, (setuge FP:$src1, FP:$src2))]>, isF;
+
+def PCMPEQUNCR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = r0, r0">, isA;
+
+def : Pat<(trunc GR:$src), // truncate i64 to i1
+ (CMPNE GR:$src, r0)>; // $src!=0? If so, PR:$dst=true
+
+let isTwoAddress=1 in {
+ def TPCMPEQR0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = r0, r0">, isA;
+ def TPCMPNER0R0 : AForm<0x03, 0x0b, (outs PR:$dst), (ins PR:$bogus, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = r0, r0">, isA;
+}
+
+/* our pseudocode for OR on predicates is:
+pC = pA OR pB
+-------------
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ ;;
+(pB) cmp.eq pC,p0 = r0,r0 // if (pB) pC = 1 */
+
+def bOR : Pat<(or PR:$src1, PR:$src2),
+ (TPCMPEQR0R0 (PCMPEQUNCR0R0 PR:$src1), PR:$src2)>;
+
+/* our pseudocode for AND on predicates is:
+ *
+(pA) cmp.eq.unc pC,p0 = r0,r0 // pC = pA
+ cmp.eq pTemp,p0 = r0,r0 // pTemp = NOT pB
+ ;;
+(pB) cmp.ne pTemp,p0 = r0,r0
+ ;;
+(pTemp)cmp.ne pC,p0 = r0,r0 // if (NOT pB) pC = 0 */
+
+def bAND : Pat<(and PR:$src1, PR:$src2),
+ ( TPCMPNER0R0 (PCMPEQUNCR0R0 PR:$src1),
+ (TPCMPNER0R0 (CMPEQ r0, r0), PR:$src2) )>;
+
+/* one possible routine for XOR on predicates is:
+
+ // Compute px = py ^ pz
+ // using sum of products: px = (py & !pz) | (pz & !py)
+ // Uses 5 instructions in 3 cycles.
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(py) cmp.eq.unc pt = r0, r0 // pt = py
+ ;;
+ // cycle 2
+(pt) cmp.ne.and px = r0, r0 // px = px & !pt (px = pz & !pt)
+(pz) cmp.ne.and pt = r0, r0 // pt = pt & !pz
+ ;;
+ } { .mmi
+ // cycle 3
+(pt) cmp.eq.or px = r0, r0 // px = px | pt
+
+*** Another, which we use here, requires one scratch GR. it is:
+
+ mov rt = 0 // initialize rt off critical path
+ ;;
+
+ // cycle 1
+(pz) cmp.eq.unc px = r0, r0 // px = pz
+(pz) mov rt = 1 // rt = pz
+ ;;
+ // cycle 2
+(py) cmp.ne px = 1, rt // if (py) px = !pz
+
+.. these routines kindly provided by Jim Hull
+*/
+
+def bXOR : Pat<(xor PR:$src1, PR:$src2),
+ (TPCMPIMM8NE (PCMPEQUNCR0R0 PR:$src2), 1,
+ (TPCADDS (ADDS r0, 0), 1, PR:$src2),
+ PR:$src1)>;
+
+def XOR : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "xor $dst = $src1, $src2",
+ [(set GR:$dst, (xor GR:$src1, GR:$src2))]>, isA;
+
+def SHLADD: AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1,s64imm:$imm,GR:$src2),
+ "shladd $dst = $src1, $imm, $src2",
+ [(set GR:$dst, (add GR:$src2, (shl GR:$src1, isSHLADDimm:$imm)))]>, isA;
+
+def SHL : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shl $dst = $src1, $src2",
+ [(set GR:$dst, (shl GR:$src1, GR:$src2))]>, isI;
+
+def SHRU : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shr.u $dst = $src1, $src2",
+ [(set GR:$dst, (srl GR:$src1, GR:$src2))]>, isI;
+
+def SHRS : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, GR:$src2),
+ "shr $dst = $src1, $src2",
+ [(set GR:$dst, (sra GR:$src1, GR:$src2))]>, isI;
+
+def MOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src), "mov $dst = $src">, isA;
+def FMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "mov $dst = $src">, isF; // XXX: there _is_ no fmov
+def PMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+
+def SPILL_ALL_PREDICATES_TO_GR : AForm<0x03, 0x0b, (outs GR:$dst), (ins),
+ "mov $dst = pr">, isI;
+def FILL_ALL_PREDICATES_FROM_GR : AForm<0x03, 0x0b, (outs), (ins GR:$src),
+ "mov pr = $src">, isI;
+
+let isTwoAddress = 1 in {
+ def CMOV : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src2, GR:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isA;
+}
+
+def PFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+
+let isTwoAddress = 1 in {
+ def CFMOV : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src2, FP:$src, PR:$qp),
+ "($qp) mov $dst = $src">, isF;
+}
+
+def SELECTINT : Pat<(select PR:$which, GR:$src1, GR:$src2),
+ (CMOV (MOV GR:$src2), GR:$src1, PR:$which)>; // note order!
+def SELECTFP : Pat<(select PR:$which, FP:$src1, FP:$src2),
+ (CFMOV (FMOV FP:$src2), FP:$src1, PR:$which)>; // note order!
+// TODO: can do this faster, w/o using any integer regs (see pattern isel)
+def SELECTBOOL : Pat<(select PR:$which, PR:$src1, PR:$src2), // note order!
+ (CMPNE (CMOV
+ (MOV (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src2)),
+ (TPCADDIMM22 (ADDS r0, 0), 1, PR:$src1), PR:$which), r0)>;
+
+// load constants of various sizes // FIXME: prettyprint -ve constants
+def : Pat<(i64 immSExt14:$imm), (ADDS r0, immSExt14:$imm)>;
+def : Pat<(i1 -1), (CMPEQ r0, r0)>; // TODO: this should just be a ref to p0
+def : Pat<(i1 0), (CMPNE r0, r0)>; // TODO: any instruction actually *using*
+ // this predicate should be killed!
+
+// TODO: support postincrement (reg, imm9) loads+stores - this needs more
+// tablegen support
+
+def IUSE : PseudoInstIA64<(outs), (ins variable_ops), "// IUSE">;
+def ADJUSTCALLSTACKUP : PseudoInstIA64<(outs), (ins variable_ops),
+ "// ADJUSTCALLSTACKUP">;
+def ADJUSTCALLSTACKDOWN : PseudoInstIA64<(outs), (ins variable_ops),
+ "// ADJUSTCALLSTACKDOWN">;
+def PSEUDO_ALLOC : PseudoInstIA64<(outs), (ins GR:$foo), "// PSEUDO_ALLOC">;
+
+def ALLOC : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins i8imm:$inputs, i8imm:$locals, i8imm:$outputs, i8imm:$rotating),
+ "alloc $dst = ar.pfs,$inputs,$locals,$outputs,$rotating">, isM;
+
+let isTwoAddress = 1 in {
+ def TCMPNE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4),
+ "cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQOR : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPNE : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src3, $src4">, isA;
+
+ def TPCMPEQ : AForm<0x03, 0x0b,
+ (outs PR:$dst), (ins PR:$src2, GR:$src3, GR:$src4, PR:$qp),
+ "($qp) cmp.eq $dst, p0 = $src3, $src4">, isA;
+}
+
+def MOVSIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s14imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVSIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s22imm:$imm),
+ "mov $dst = $imm">, isA;
+def MOVLIMM64 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s64imm:$imm),
+ "movl $dst = $imm">, isLX;
+
+def SHLI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shl $dst = $src1, $imm">, isI;
+def SHRUI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shr.u $dst = $src1, $imm">, isI;
+def SHRSI : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, u6imm:$imm),
+ "shr $dst = $src1, $imm">, isI;
+
+def EXTRU : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "extr.u $dst = $src1, $imm1, $imm2">, isI;
+
+def DEPZ : AForm<0x03, 0x0b,
+ (outs GR:$dst), (ins GR:$src1, u6imm:$imm1, u6imm:$imm2),
+ "dep.z $dst = $src1, $imm1, $imm2">, isI;
+
+def PCMPEQOR : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.or $dst, p0 = $src1, $src2">, isA;
+def PCMPEQUNC : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.eq.unc $dst, p0 = $src1, $src2">, isA;
+def PCMPNE : AForm<0x03, 0x0b, (outs PR:$dst), (ins GR:$src1, GR:$src2, PR:$qp),
+ "($qp) cmp.ne $dst, p0 = $src1, $src2">, isA;
+
+// two destinations!
+def BCMPEQ : AForm<0x03, 0x0b, (outs PR:$dst1, PR:$dst2), (ins GR:$src1, GR:$src2),
+ "cmp.eq $dst1, dst2 = $src1, $src2">, isA;
+
+def ADDIMM14 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s14imm:$imm),
+ "adds $dst = $imm, $src1">, isA;
+
+def ADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm),
+ "add $dst = $imm, $src1">, isA;
+def CADDIMM22 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$src1, s22imm:$imm, PR:$qp),
+ "($qp) add $dst = $imm, $src1">, isA;
+
+def SUBIMM8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins s8imm:$imm, GR:$src2),
+ "sub $dst = $imm, $src2">, isA;
+
+let mayStore = 1 in {
+ def ST1 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st1 [$dstPtr] = $value">, isM;
+ def ST2 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st2 [$dstPtr] = $value">, isM;
+ def ST4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st4 [$dstPtr] = $value">, isM;
+ def ST8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, GR:$value),
+ "st8 [$dstPtr] = $value">, isM;
+ def STF4 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stfs [$dstPtr] = $value">, isM;
+ def STF8 : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stfd [$dstPtr] = $value">, isM;
+ def STF_SPILL : AForm<0x03, 0x0b, (outs), (ins GR:$dstPtr, FP:$value),
+ "stf.spill [$dstPtr] = $value">, isM;
+}
+
+let canFoldAsLoad = 1 in {
+ def LD1 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld1 $dst = [$srcPtr]">, isM;
+ def LD2 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld2 $dst = [$srcPtr]">, isM;
+ def LD4 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld4 $dst = [$srcPtr]">, isM;
+ def LD8 : AForm<0x03, 0x0b, (outs GR:$dst), (ins GR:$srcPtr),
+ "ld8 $dst = [$srcPtr]">, isM;
+ def LDF4 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldfs $dst = [$srcPtr]">, isM;
+ def LDF8 : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldfd $dst = [$srcPtr]">, isM;
+ def LDF_FILL : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$srcPtr),
+ "ldf.fill $dst = [$srcPtr]">, isM;
+}
+
+def POPCNT : AForm_DAG<0x03, 0x0b, (outs GR:$dst), (ins GR:$src),
+ "popcnt $dst = $src",
+ [(set GR:$dst, (ctpop GR:$src))]>, isI;
+
+// some FP stuff: // TODO: single-precision stuff?
+def FADD : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fadd $dst = $src1, $src2",
+ [(set FP:$dst, (fadd FP:$src1, FP:$src2))]>, isF;
+def FADDS: AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fadd.s $dst = $src1, $src2">, isF;
+def FSUB : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fsub $dst = $src1, $src2",
+ [(set FP:$dst, (fsub FP:$src1, FP:$src2))]>, isF;
+def FMPY : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2),
+ "fmpy $dst = $src1, $src2",
+ [(set FP:$dst, (fmul FP:$src1, FP:$src2))]>, isF;
+def FMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fadd (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FMS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fms $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fsub (fmul FP:$src1, FP:$src2), FP:$src3))]>, isF;
+def FNMA : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "fnma $dst = $src1, $src2, $src3",
+ [(set FP:$dst, (fneg (fadd (fmul FP:$src1, FP:$src2), FP:$src3)))]>, isF;
+def FABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fabs $dst = $src",
+ [(set FP:$dst, (fabs FP:$src))]>, isF;
+def FNEG : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fneg $dst = $src",
+ [(set FP:$dst, (fneg FP:$src))]>, isF;
+def FNEGABS : AForm_DAG<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fnegabs $dst = $src",
+ [(set FP:$dst, (fneg (fabs FP:$src)))]>, isF;
+
+let isTwoAddress=1 in {
+def TCFMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def TCFMADS0 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$bogussrc, FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+}
+
+def CFMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.s1 $dst = $src1, $src2, $src3">, isF;
+def CFNMAS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.s1 $dst = $src1, $src2, $src3">, isF;
+
+def CFMADS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s1 $dst = $src1, $src2, $src3">, isF;
+def CFMADS0 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fma.d.s0 $dst = $src1, $src2, $src3">, isF;
+def CFNMADS1 : AForm<0x03, 0x0b,
+ (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3, PR:$qp),
+ "($qp) fnma.d.s1 $dst = $src1, $src2, $src3">, isF;
+
+def FRCPAS0 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2),
+ "frcpa.s0 $dstFR, $dstPR = $src1, $src2">, isF;
+def FRCPAS1 : AForm<0x03, 0x0b, (outs FP:$dstFR, PR:$dstPR), (ins FP:$src1, FP:$src2),
+ "frcpa.s1 $dstFR, $dstPR = $src1, $src2">, isF;
+
+def XMAL : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src1, FP:$src2, FP:$src3),
+ "xma.l $dst = $src1, $src2, $src3">, isF;
+
+def FCVTXF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xf $dst = $src">, isF;
+def FCVTXUF : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xuf $dst = $src">, isF;
+def FCVTXUFS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.xuf.s1 $dst = $src">, isF;
+def FCVTFX : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx $dst = $src">, isF;
+def FCVTFXU : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu $dst = $src">, isF;
+
+def FCVTFXTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx.trunc $dst = $src">, isF;
+def FCVTFXUTRUNC : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu.trunc $dst = $src">, isF;
+
+def FCVTFXTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fx.trunc.s1 $dst = $src">, isF;
+def FCVTFXUTRUNCS1 : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fcvt.fxu.trunc.s1 $dst = $src">, isF;
+
+def FNORMD : AForm<0x03, 0x0b, (outs FP:$dst), (ins FP:$src),
+ "fnorm.d $dst = $src">, isF;
+
+def GETFD : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.d $dst = $src">, isM;
+def SETFD : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.d $dst = $src">, isM;
+
+def GETFSIG : AForm<0x03, 0x0b, (outs GR:$dst), (ins FP:$src),
+ "getf.sig $dst = $src">, isM;
+def SETFSIG : AForm<0x03, 0x0b, (outs FP:$dst), (ins GR:$src),
+ "setf.sig $dst = $src">, isM;
+
+// these four FP<->int conversion patterns need checking/cleaning
+def SINT_TO_FP : Pat<(sint_to_fp GR:$src),
+ (FNORMD (FCVTXF (SETFSIG GR:$src)))>;
+def UINT_TO_FP : Pat<(uint_to_fp GR:$src),
+ (FNORMD (FCVTXUF (SETFSIG GR:$src)))>;
+def FP_TO_SINT : Pat<(i64 (fp_to_sint FP:$src)),
+ (GETFSIG (FCVTFXTRUNC FP:$src))>;
+def FP_TO_UINT : Pat<(i64 (fp_to_uint FP:$src)),
+ (GETFSIG (FCVTFXUTRUNC FP:$src))>;
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+def fpimmn0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+def fpimmn1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+def : Pat<(f64 fpimm0), (FMOV F0)>;
+def : Pat<(f64 fpimm1), (FMOV F1)>;
+def : Pat<(f64 fpimmn0), (FNEG F0)>;
+def : Pat<(f64 fpimmn1), (FNEG F1)>;
+
+let isTerminator = 1, isBranch = 1 in {
+ def BRL_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins i64imm:$dst),
+ "(p0) brl.cond.sptk $dst">, isB;
+ def BRLCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.sptk $dst">, isB;
+ def BRCOND_NOTCALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst),
+ "($qp) br.cond.sptk $dst">, isB;
+}
+
+let isCall = 1, /* isTerminator = 1, isBranch = 1, */
+ Uses = [out0,out1,out2,out3,out4,out5,out6,out7],
+// all calls clobber non-callee-saved registers, and for now, they are these:
+ Defs = [r2,r3,r8,r9,r10,r11,r14,r15,r16,r17,r18,r19,r20,r21,r22,r23,r24,
+ r25,r26,r27,r28,r29,r30,r31,
+ p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,
+ F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,
+ F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49,
+ F50,F51,F52,F53,F54,F55,F56,
+ F57,F58,F59,F60,F61,F62,F63,F64,F65,F66,F67,F68,F69,F70,F71,F72,F73,F74,
+ F75,F76,F77,F78,F79,F80,F81,
+ F82,F83,F84,F85,F86,F87,F88,F89,F90,F91,F92,F93,F94,F95,F96,F97,F98,F99,
+ F100,F101,F102,F103,F104,F105,
+ F106,F107,F108,F109,F110,F111,F112,F113,F114,F115,F116,F117,F118,F119,
+ F120,F121,F122,F123,F124,F125,F126,F127,
+ out0,out1,out2,out3,out4,out5,out6,out7] in {
+// old pattern call
+ def BRCALL: RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// new daggy stuff!
+
+// calls a globaladdress
+ def BRCALL_IPREL_GA : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls an externalsymbol
+ def BRCALL_IPREL_ES : RawForm<0x03, 0xb0, (outs), (ins calltarget:$dst),
+ "br.call.sptk rp = $dst">, isB; // FIXME: teach llvm about branch regs?
+// calls through a function descriptor
+ def BRCALL_INDIRECT : RawForm<0x03, 0xb0, (outs), (ins GR:$branchreg),
+ "br.call.sptk rp = $branchreg">, isB; // FIXME: teach llvm about branch regs?
+ def BRLCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, i64imm:$dst),
+ "($qp) brl.cond.call.sptk $dst">, isB;
+ def BRCOND_CALL : RawForm<0x03, 0xb0, (outs), (ins PR:$qp, GR:$dst),
+ "($qp) br.cond.call.sptk $dst">, isB;
+}
+
+// Return branch:
+let isTerminator = 1, isReturn = 1 in
+ def RET : AForm_DAG<0x03, 0x0b, (outs), (ins),
+ "br.ret.sptk.many rp",
+ [(retflag)]>, isB; // return
+def : Pat<(ret), (RET)>;
+
+// the evil stop bit of despair
+def STOP : PseudoInstIA64<(outs), (ins variable_ops), ";;">;
+
diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h
new file mode 100644
index 000000000000..fb930564a9d1
--- /dev/null
+++ b/lib/Target/IA64/IA64MachineFunctionInfo.h
@@ -0,0 +1,34 @@
+//===-- IA64MachineFunctionInfo.h - IA64-specific information ---*- C++ -*-===//
+//===-- for MachineFunction ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares IA64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64MACHINEFUNCTIONINFO_H
+#define IA64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+//#include "IA64JITInfo.h"
+
+namespace llvm {
+
+class IA64FunctionInfo : public MachineFunctionInfo {
+
+public:
+ unsigned outRegsUsed; // how many 'out' registers are used
+ // by this machinefunction? (used to compute the appropriate
+ // entry in the 'alloc' instruction at the top of the
+ // machinefunction)
+ IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; };
+
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.cpp b/lib/Target/IA64/IA64RegisterInfo.cpp
new file mode 100644
index 000000000000..7ad6f51a9b8b
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.cpp
@@ -0,0 +1,319 @@
+//===- IA64RegisterInfo.cpp - IA64 Register Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on IA64.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64.h"
+#include "IA64RegisterInfo.h"
+#include "IA64InstrBuilder.h"
+#include "IA64MachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+IA64RegisterInfo::IA64RegisterInfo(const TargetInstrInfo &tii)
+ : IA64GenRegisterInfo(IA64::ADJUSTCALLSTACKDOWN, IA64::ADJUSTCALLSTACKUP),
+ TII(tii) {}
+
+const unsigned* IA64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ IA64::r5, 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+IA64RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &IA64::GRRegClass, 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector IA64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(IA64::r0);
+ Reserved.set(IA64::r1);
+ Reserved.set(IA64::r2);
+ Reserved.set(IA64::r5);
+ Reserved.set(IA64::r12);
+ Reserved.set(IA64::r13);
+ Reserved.set(IA64::r22);
+ Reserved.set(IA64::rp);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool IA64RegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+void IA64RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (hasFP(MF)) {
+ // If we have a frame pointer, turn the adjcallstackup instruction into a
+ // 'sub SP, <amt>' and the adjcallstackdown instruction into 'add SP,
+ // <amt>'
+ MachineInstr *Old = I;
+ unsigned Amount = Old->getOperand(0).getImm();
+ DebugLoc dl = Old->getDebugLoc();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ // Replace the pseudo instruction with a new instruction...
+ if (Old->getOpcode() == IA64::ADJUSTCALLSTACKDOWN) {
+ BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12)
+ .addReg(IA64::r12).addImm(-Amount);
+ } else {
+ assert(Old->getOpcode() == IA64::ADJUSTCALLSTACKUP);
+ BuildMI(MBB, I, dl, TII.get(IA64::ADDIMM22), IA64::r12)
+ .addReg(IA64::r12).addImm(Amount);
+ }
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void IA64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS)const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // choose a base register: ( hasFP? framepointer : stack pointer )
+ unsigned BaseRegister = FP ? IA64::r5 : IA64::r12;
+ // Add the base register
+ MI.getOperand(i).ChangeToRegister(BaseRegister, false);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MF.getFrameInfo()->getStackSize();
+
+ // XXX: we use 'r22' as another hack+slash temporary register here :(
+ if (Offset <= 8191 && Offset >= -8192) { // smallish offset
+ // Fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ //insert the new
+ BuildMI(MBB, II, dl, TII.get(IA64::ADDIMM22), IA64::r22)
+ .addReg(BaseRegister).addImm(Offset);
+ } else { // it's big
+ //fix up the old:
+ MI.getOperand(i).ChangeToRegister(IA64::r22, false);
+ BuildMI(MBB, II, dl, TII.get(IA64::MOVLIMM64), IA64::r22).addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(IA64::ADD), IA64::r22).addReg(BaseRegister)
+ .addReg(IA64::r22);
+ }
+
+}
+
+void IA64RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool FP = hasFP(MF);
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // first, we handle the 'alloc' instruction, that should be right up the
+ // top of any function
+ static const unsigned RegsInOrder[96] = { // there are 96 GPRs the
+ // RSE worries about
+ IA64::r32, IA64::r33, IA64::r34, IA64::r35,
+ IA64::r36, IA64::r37, IA64::r38, IA64::r39, IA64::r40, IA64::r41,
+ IA64::r42, IA64::r43, IA64::r44, IA64::r45, IA64::r46, IA64::r47,
+ IA64::r48, IA64::r49, IA64::r50, IA64::r51, IA64::r52, IA64::r53,
+ IA64::r54, IA64::r55, IA64::r56, IA64::r57, IA64::r58, IA64::r59,
+ IA64::r60, IA64::r61, IA64::r62, IA64::r63, IA64::r64, IA64::r65,
+ IA64::r66, IA64::r67, IA64::r68, IA64::r69, IA64::r70, IA64::r71,
+ IA64::r72, IA64::r73, IA64::r74, IA64::r75, IA64::r76, IA64::r77,
+ IA64::r78, IA64::r79, IA64::r80, IA64::r81, IA64::r82, IA64::r83,
+ IA64::r84, IA64::r85, IA64::r86, IA64::r87, IA64::r88, IA64::r89,
+ IA64::r90, IA64::r91, IA64::r92, IA64::r93, IA64::r94, IA64::r95,
+ IA64::r96, IA64::r97, IA64::r98, IA64::r99, IA64::r100, IA64::r101,
+ IA64::r102, IA64::r103, IA64::r104, IA64::r105, IA64::r106, IA64::r107,
+ IA64::r108, IA64::r109, IA64::r110, IA64::r111, IA64::r112, IA64::r113,
+ IA64::r114, IA64::r115, IA64::r116, IA64::r117, IA64::r118, IA64::r119,
+ IA64::r120, IA64::r121, IA64::r122, IA64::r123, IA64::r124, IA64::r125,
+ IA64::r126, IA64::r127 };
+
+ unsigned numStackedGPRsUsed=0;
+ for (int i=0; i != 96; i++) {
+ if (MF.getRegInfo().isPhysRegUsed(RegsInOrder[i]))
+ numStackedGPRsUsed=i+1; // (i+1 and not ++ - consider fn(fp, fp, int)
+ }
+
+ unsigned numOutRegsUsed=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+
+ // XXX FIXME : this code should be a bit more reliable (in case there _isn't_
+ // a pseudo_alloc in the MBB)
+ unsigned dstRegOfPseudoAlloc;
+ for(MBBI = MBB.begin(); /*MBBI->getOpcode() != IA64::PSEUDO_ALLOC*/; ++MBBI) {
+ assert(MBBI != MBB.end());
+ if(MBBI->getOpcode() == IA64::PSEUDO_ALLOC) {
+ dstRegOfPseudoAlloc=MBBI->getOperand(0).getReg();
+ break;
+ }
+ }
+
+ if (MBBI != MBB.end()) dl = MBBI->getDebugLoc();
+
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ALLOC)).
+ addReg(dstRegOfPseudoAlloc).addImm(0).
+ addImm(numStackedGPRsUsed).addImm(numOutRegsUsed).addImm(0);
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned NumBytes = MFI->getStackSize();
+
+ if(FP)
+ NumBytes += 8; // reserve space for the old FP
+
+ // Do we need to allocate space on the stack?
+ if (NumBytes == 0)
+ return;
+
+ // Add 16 bytes at the bottom of the stack (scratch area)
+ // and round the size to a multiple of the alignment.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Size = 16 + (FP ? 8 : 0);
+ NumBytes = (NumBytes+Size+Align-1)/Align*Align;
+
+ // Update frame info to pretend that this is part of the stack...
+ MFI->setStackSize(NumBytes);
+
+ // adjust stack pointer: r12 -= numbytes
+ if (NumBytes <= 8191) {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12).addReg(IA64::r12).
+ addImm(-NumBytes);
+ } else { // we use r22 as a scratch register here
+ // first load the decrement into r22
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22).
+ addImm(-NumBytes);
+ // FIXME: MOVLSI32 expects a _u_32imm
+ // then add (subtract) it to r12 (stack ptr)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12)
+ .addReg(IA64::r12).addReg(IA64::r22);
+
+ }
+
+ // now if we need to, save the old FP and set the new
+ if (FP) {
+ BuildMI(MBB, MBBI,dl,TII.get(IA64::ST8)).addReg(IA64::r12).addReg(IA64::r5);
+ // this must be the last instr in the prolog ? (XXX: why??)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r5).addReg(IA64::r12);
+ }
+
+}
+
+void IA64RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ assert(MBBI->getOpcode() == IA64::RET &&
+ "Can only insert epilog into returning blocks");
+ DebugLoc dl = MBBI->getDebugLoc();
+ bool FP = hasFP(MF);
+
+ // Get the number of bytes allocated from the FrameInfo...
+ unsigned NumBytes = MFI->getStackSize();
+
+ //now if we need to, restore the old FP
+ if (FP) {
+ //copy the FP into the SP (discards allocas)
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOV), IA64::r12).addReg(IA64::r5);
+ //restore the FP
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::LD8), IA64::r5).addReg(IA64::r5);
+ }
+
+ if (NumBytes != 0) {
+ if (NumBytes <= 8191) {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADDIMM22),IA64::r12).
+ addReg(IA64::r12).addImm(NumBytes);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::MOVLIMM64), IA64::r22).
+ addImm(NumBytes);
+ BuildMI(MBB, MBBI, dl, TII.get(IA64::ADD), IA64::r12).addReg(IA64::r12).
+ addReg(IA64::r22);
+ }
+ }
+}
+
+unsigned IA64RegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? IA64::r5 : IA64::r12;
+}
+
+unsigned IA64RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned IA64RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int IA64RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "IA64GenRegisterInfo.inc"
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.h b/lib/Target/IA64/IA64RegisterInfo.h
new file mode 100644
index 000000000000..0c5083e75c25
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.h
@@ -0,0 +1,63 @@
+//===- IA64RegisterInfo.h - IA64 Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the IA64 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64REGISTERINFO_H
+#define IA64REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "IA64GenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct IA64RegisterInfo : public IA64GenRegisterInfo {
+ const TargetInstrInfo &TII;
+
+ IA64RegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/IA64/IA64RegisterInfo.td b/lib/Target/IA64/IA64RegisterInfo.td
new file mode 100644
index 000000000000..dd72dc3008a3
--- /dev/null
+++ b/lib/Target/IA64/IA64RegisterInfo.td
@@ -0,0 +1,509 @@
+//===- IA64RegisterInfo.td - Describe the IA64 Register File ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the IA64 register file, defining the registers
+// themselves, aliases between the registers, and the register classes built
+// out of the registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+
+class IA64Register<string n> : Register<n> {
+ let Namespace = "IA64";
+}
+
+// GR - One of 128 32-bit general registers
+class GR<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// FP - One of 128 82-bit floating-point registers
+class FP<bits<7> num, string n> : IA64Register<n> {
+ field bits<7> Num = num;
+}
+
+// PR - One of 64 1-bit predicate registers
+class PR<bits<6> num, string n> : IA64Register<n> {
+ field bits<6> Num = num;
+}
+
+/* general registers */
+def r0 : GR< 0, "r0">, DwarfRegNum<[0]>;
+def r1 : GR< 1, "r1">, DwarfRegNum<[1]>;
+def r2 : GR< 2, "r2">, DwarfRegNum<[2]>;
+def r3 : GR< 3, "r3">, DwarfRegNum<[3]>;
+def r4 : GR< 4, "r4">, DwarfRegNum<[4]>;
+def r5 : GR< 5, "r5">, DwarfRegNum<[5]>;
+def r6 : GR< 6, "r6">, DwarfRegNum<[6]>;
+def r7 : GR< 7, "r7">, DwarfRegNum<[7]>;
+def r8 : GR< 8, "r8">, DwarfRegNum<[8]>;
+def r9 : GR< 9, "r9">, DwarfRegNum<[9]>;
+def r10 : GR< 10, "r10">, DwarfRegNum<[10]>;
+def r11 : GR< 11, "r11">, DwarfRegNum<[11]>;
+def r12 : GR< 12, "r12">, DwarfRegNum<[12]>;
+def r13 : GR< 13, "r13">, DwarfRegNum<[13]>;
+def r14 : GR< 14, "r14">, DwarfRegNum<[14]>;
+def r15 : GR< 15, "r15">, DwarfRegNum<[15]>;
+def r16 : GR< 16, "r16">, DwarfRegNum<[16]>;
+def r17 : GR< 17, "r17">, DwarfRegNum<[17]>;
+def r18 : GR< 18, "r18">, DwarfRegNum<[18]>;
+def r19 : GR< 19, "r19">, DwarfRegNum<[19]>;
+def r20 : GR< 20, "r20">, DwarfRegNum<[20]>;
+def r21 : GR< 21, "r21">, DwarfRegNum<[21]>;
+def r22 : GR< 22, "r22">, DwarfRegNum<[22]>;
+def r23 : GR< 23, "r23">, DwarfRegNum<[23]>;
+def r24 : GR< 24, "r24">, DwarfRegNum<[24]>;
+def r25 : GR< 25, "r25">, DwarfRegNum<[25]>;
+def r26 : GR< 26, "r26">, DwarfRegNum<[26]>;
+def r27 : GR< 27, "r27">, DwarfRegNum<[27]>;
+def r28 : GR< 28, "r28">, DwarfRegNum<[28]>;
+def r29 : GR< 29, "r29">, DwarfRegNum<[29]>;
+def r30 : GR< 30, "r30">, DwarfRegNum<[30]>;
+def r31 : GR< 31, "r31">, DwarfRegNum<[31]>;
+def r32 : GR< 32, "r32">, DwarfRegNum<[32]>;
+def r33 : GR< 33, "r33">, DwarfRegNum<[33]>;
+def r34 : GR< 34, "r34">, DwarfRegNum<[34]>;
+def r35 : GR< 35, "r35">, DwarfRegNum<[35]>;
+def r36 : GR< 36, "r36">, DwarfRegNum<[36]>;
+def r37 : GR< 37, "r37">, DwarfRegNum<[37]>;
+def r38 : GR< 38, "r38">, DwarfRegNum<[38]>;
+def r39 : GR< 39, "r39">, DwarfRegNum<[39]>;
+def r40 : GR< 40, "r40">, DwarfRegNum<[40]>;
+def r41 : GR< 41, "r41">, DwarfRegNum<[41]>;
+def r42 : GR< 42, "r42">, DwarfRegNum<[42]>;
+def r43 : GR< 43, "r43">, DwarfRegNum<[43]>;
+def r44 : GR< 44, "r44">, DwarfRegNum<[44]>;
+def r45 : GR< 45, "r45">, DwarfRegNum<[45]>;
+def r46 : GR< 46, "r46">, DwarfRegNum<[46]>;
+def r47 : GR< 47, "r47">, DwarfRegNum<[47]>;
+def r48 : GR< 48, "r48">, DwarfRegNum<[48]>;
+def r49 : GR< 49, "r49">, DwarfRegNum<[49]>;
+def r50 : GR< 50, "r50">, DwarfRegNum<[50]>;
+def r51 : GR< 51, "r51">, DwarfRegNum<[51]>;
+def r52 : GR< 52, "r52">, DwarfRegNum<[52]>;
+def r53 : GR< 53, "r53">, DwarfRegNum<[53]>;
+def r54 : GR< 54, "r54">, DwarfRegNum<[54]>;
+def r55 : GR< 55, "r55">, DwarfRegNum<[55]>;
+def r56 : GR< 56, "r56">, DwarfRegNum<[56]>;
+def r57 : GR< 57, "r57">, DwarfRegNum<[57]>;
+def r58 : GR< 58, "r58">, DwarfRegNum<[58]>;
+def r59 : GR< 59, "r59">, DwarfRegNum<[59]>;
+def r60 : GR< 60, "r60">, DwarfRegNum<[60]>;
+def r61 : GR< 61, "r61">, DwarfRegNum<[61]>;
+def r62 : GR< 62, "r62">, DwarfRegNum<[62]>;
+def r63 : GR< 63, "r63">, DwarfRegNum<[63]>;
+def r64 : GR< 64, "r64">, DwarfRegNum<[64]>;
+def r65 : GR< 65, "r65">, DwarfRegNum<[65]>;
+def r66 : GR< 66, "r66">, DwarfRegNum<[66]>;
+def r67 : GR< 67, "r67">, DwarfRegNum<[67]>;
+def r68 : GR< 68, "r68">, DwarfRegNum<[68]>;
+def r69 : GR< 69, "r69">, DwarfRegNum<[69]>;
+def r70 : GR< 70, "r70">, DwarfRegNum<[70]>;
+def r71 : GR< 71, "r71">, DwarfRegNum<[71]>;
+def r72 : GR< 72, "r72">, DwarfRegNum<[72]>;
+def r73 : GR< 73, "r73">, DwarfRegNum<[73]>;
+def r74 : GR< 74, "r74">, DwarfRegNum<[74]>;
+def r75 : GR< 75, "r75">, DwarfRegNum<[75]>;
+def r76 : GR< 76, "r76">, DwarfRegNum<[76]>;
+def r77 : GR< 77, "r77">, DwarfRegNum<[77]>;
+def r78 : GR< 78, "r78">, DwarfRegNum<[78]>;
+def r79 : GR< 79, "r79">, DwarfRegNum<[79]>;
+def r80 : GR< 80, "r80">, DwarfRegNum<[80]>;
+def r81 : GR< 81, "r81">, DwarfRegNum<[81]>;
+def r82 : GR< 82, "r82">, DwarfRegNum<[82]>;
+def r83 : GR< 83, "r83">, DwarfRegNum<[83]>;
+def r84 : GR< 84, "r84">, DwarfRegNum<[84]>;
+def r85 : GR< 85, "r85">, DwarfRegNum<[85]>;
+def r86 : GR< 86, "r86">, DwarfRegNum<[86]>;
+def r87 : GR< 87, "r87">, DwarfRegNum<[87]>;
+def r88 : GR< 88, "r88">, DwarfRegNum<[88]>;
+def r89 : GR< 89, "r89">, DwarfRegNum<[89]>;
+def r90 : GR< 90, "r90">, DwarfRegNum<[90]>;
+def r91 : GR< 91, "r91">, DwarfRegNum<[91]>;
+def r92 : GR< 92, "r92">, DwarfRegNum<[92]>;
+def r93 : GR< 93, "r93">, DwarfRegNum<[93]>;
+def r94 : GR< 94, "r94">, DwarfRegNum<[94]>;
+def r95 : GR< 95, "r95">, DwarfRegNum<[95]>;
+def r96 : GR< 96, "r96">, DwarfRegNum<[96]>;
+def r97 : GR< 97, "r97">, DwarfRegNum<[97]>;
+def r98 : GR< 98, "r98">, DwarfRegNum<[98]>;
+def r99 : GR< 99, "r99">, DwarfRegNum<[99]>;
+def r100 : GR< 100, "r100">, DwarfRegNum<[100]>;
+def r101 : GR< 101, "r101">, DwarfRegNum<[101]>;
+def r102 : GR< 102, "r102">, DwarfRegNum<[102]>;
+def r103 : GR< 103, "r103">, DwarfRegNum<[103]>;
+def r104 : GR< 104, "r104">, DwarfRegNum<[104]>;
+def r105 : GR< 105, "r105">, DwarfRegNum<[105]>;
+def r106 : GR< 106, "r106">, DwarfRegNum<[106]>;
+def r107 : GR< 107, "r107">, DwarfRegNum<[107]>;
+def r108 : GR< 108, "r108">, DwarfRegNum<[108]>;
+def r109 : GR< 109, "r109">, DwarfRegNum<[109]>;
+def r110 : GR< 110, "r110">, DwarfRegNum<[110]>;
+def r111 : GR< 111, "r111">, DwarfRegNum<[111]>;
+def r112 : GR< 112, "r112">, DwarfRegNum<[112]>;
+def r113 : GR< 113, "r113">, DwarfRegNum<[113]>;
+def r114 : GR< 114, "r114">, DwarfRegNum<[114]>;
+def r115 : GR< 115, "r115">, DwarfRegNum<[115]>;
+def r116 : GR< 116, "r116">, DwarfRegNum<[116]>;
+def r117 : GR< 117, "r117">, DwarfRegNum<[117]>;
+def r118 : GR< 118, "r118">, DwarfRegNum<[118]>;
+def r119 : GR< 119, "r119">, DwarfRegNum<[119]>;
+def r120 : GR< 120, "r120">, DwarfRegNum<[120]>;
+def r121 : GR< 121, "r121">, DwarfRegNum<[121]>;
+def r122 : GR< 122, "r122">, DwarfRegNum<[122]>;
+def r123 : GR< 123, "r123">, DwarfRegNum<[123]>;
+def r124 : GR< 124, "r124">, DwarfRegNum<[124]>;
+def r125 : GR< 125, "r125">, DwarfRegNum<[125]>;
+def r126 : GR< 126, "r126">, DwarfRegNum<[126]>;
+def r127 : GR< 127, "r127">, DwarfRegNum<[127]>;
+
+/* floating-point registers */
+def F0 : FP< 0, "f0">, DwarfRegNum<[128]>;
+def F1 : FP< 1, "f1">, DwarfRegNum<[129]>;
+def F2 : FP< 2, "f2">, DwarfRegNum<[130]>;
+def F3 : FP< 3, "f3">, DwarfRegNum<[131]>;
+def F4 : FP< 4, "f4">, DwarfRegNum<[132]>;
+def F5 : FP< 5, "f5">, DwarfRegNum<[133]>;
+def F6 : FP< 6, "f6">, DwarfRegNum<[134]>;
+def F7 : FP< 7, "f7">, DwarfRegNum<[135]>;
+def F8 : FP< 8, "f8">, DwarfRegNum<[136]>;
+def F9 : FP< 9, "f9">, DwarfRegNum<[137]>;
+def F10 : FP< 10, "f10">, DwarfRegNum<[138]>;
+def F11 : FP< 11, "f11">, DwarfRegNum<[139]>;
+def F12 : FP< 12, "f12">, DwarfRegNum<[140]>;
+def F13 : FP< 13, "f13">, DwarfRegNum<[141]>;
+def F14 : FP< 14, "f14">, DwarfRegNum<[142]>;
+def F15 : FP< 15, "f15">, DwarfRegNum<[143]>;
+def F16 : FP< 16, "f16">, DwarfRegNum<[144]>;
+def F17 : FP< 17, "f17">, DwarfRegNum<[145]>;
+def F18 : FP< 18, "f18">, DwarfRegNum<[146]>;
+def F19 : FP< 19, "f19">, DwarfRegNum<[147]>;
+def F20 : FP< 20, "f20">, DwarfRegNum<[148]>;
+def F21 : FP< 21, "f21">, DwarfRegNum<[149]>;
+def F22 : FP< 22, "f22">, DwarfRegNum<[150]>;
+def F23 : FP< 23, "f23">, DwarfRegNum<[151]>;
+def F24 : FP< 24, "f24">, DwarfRegNum<[152]>;
+def F25 : FP< 25, "f25">, DwarfRegNum<[153]>;
+def F26 : FP< 26, "f26">, DwarfRegNum<[154]>;
+def F27 : FP< 27, "f27">, DwarfRegNum<[155]>;
+def F28 : FP< 28, "f28">, DwarfRegNum<[156]>;
+def F29 : FP< 29, "f29">, DwarfRegNum<[157]>;
+def F30 : FP< 30, "f30">, DwarfRegNum<[158]>;
+def F31 : FP< 31, "f31">, DwarfRegNum<[159]>;
+def F32 : FP< 32, "f32">, DwarfRegNum<[160]>;
+def F33 : FP< 33, "f33">, DwarfRegNum<[161]>;
+def F34 : FP< 34, "f34">, DwarfRegNum<[162]>;
+def F35 : FP< 35, "f35">, DwarfRegNum<[163]>;
+def F36 : FP< 36, "f36">, DwarfRegNum<[164]>;
+def F37 : FP< 37, "f37">, DwarfRegNum<[165]>;
+def F38 : FP< 38, "f38">, DwarfRegNum<[166]>;
+def F39 : FP< 39, "f39">, DwarfRegNum<[167]>;
+def F40 : FP< 40, "f40">, DwarfRegNum<[168]>;
+def F41 : FP< 41, "f41">, DwarfRegNum<[169]>;
+def F42 : FP< 42, "f42">, DwarfRegNum<[170]>;
+def F43 : FP< 43, "f43">, DwarfRegNum<[171]>;
+def F44 : FP< 44, "f44">, DwarfRegNum<[172]>;
+def F45 : FP< 45, "f45">, DwarfRegNum<[173]>;
+def F46 : FP< 46, "f46">, DwarfRegNum<[174]>;
+def F47 : FP< 47, "f47">, DwarfRegNum<[175]>;
+def F48 : FP< 48, "f48">, DwarfRegNum<[176]>;
+def F49 : FP< 49, "f49">, DwarfRegNum<[177]>;
+def F50 : FP< 50, "f50">, DwarfRegNum<[178]>;
+def F51 : FP< 51, "f51">, DwarfRegNum<[179]>;
+def F52 : FP< 52, "f52">, DwarfRegNum<[180]>;
+def F53 : FP< 53, "f53">, DwarfRegNum<[181]>;
+def F54 : FP< 54, "f54">, DwarfRegNum<[182]>;
+def F55 : FP< 55, "f55">, DwarfRegNum<[183]>;
+def F56 : FP< 56, "f56">, DwarfRegNum<[184]>;
+def F57 : FP< 57, "f57">, DwarfRegNum<[185]>;
+def F58 : FP< 58, "f58">, DwarfRegNum<[186]>;
+def F59 : FP< 59, "f59">, DwarfRegNum<[187]>;
+def F60 : FP< 60, "f60">, DwarfRegNum<[188]>;
+def F61 : FP< 61, "f61">, DwarfRegNum<[189]>;
+def F62 : FP< 62, "f62">, DwarfRegNum<[190]>;
+def F63 : FP< 63, "f63">, DwarfRegNum<[191]>;
+def F64 : FP< 64, "f64">, DwarfRegNum<[192]>;
+def F65 : FP< 65, "f65">, DwarfRegNum<[193]>;
+def F66 : FP< 66, "f66">, DwarfRegNum<[194]>;
+def F67 : FP< 67, "f67">, DwarfRegNum<[195]>;
+def F68 : FP< 68, "f68">, DwarfRegNum<[196]>;
+def F69 : FP< 69, "f69">, DwarfRegNum<[197]>;
+def F70 : FP< 70, "f70">, DwarfRegNum<[198]>;
+def F71 : FP< 71, "f71">, DwarfRegNum<[199]>;
+def F72 : FP< 72, "f72">, DwarfRegNum<[200]>;
+def F73 : FP< 73, "f73">, DwarfRegNum<[201]>;
+def F74 : FP< 74, "f74">, DwarfRegNum<[202]>;
+def F75 : FP< 75, "f75">, DwarfRegNum<[203]>;
+def F76 : FP< 76, "f76">, DwarfRegNum<[204]>;
+def F77 : FP< 77, "f77">, DwarfRegNum<[205]>;
+def F78 : FP< 78, "f78">, DwarfRegNum<[206]>;
+def F79 : FP< 79, "f79">, DwarfRegNum<[207]>;
+def F80 : FP< 80, "f80">, DwarfRegNum<[208]>;
+def F81 : FP< 81, "f81">, DwarfRegNum<[209]>;
+def F82 : FP< 82, "f82">, DwarfRegNum<[210]>;
+def F83 : FP< 83, "f83">, DwarfRegNum<[211]>;
+def F84 : FP< 84, "f84">, DwarfRegNum<[212]>;
+def F85 : FP< 85, "f85">, DwarfRegNum<[213]>;
+def F86 : FP< 86, "f86">, DwarfRegNum<[214]>;
+def F87 : FP< 87, "f87">, DwarfRegNum<[215]>;
+def F88 : FP< 88, "f88">, DwarfRegNum<[216]>;
+def F89 : FP< 89, "f89">, DwarfRegNum<[217]>;
+def F90 : FP< 90, "f90">, DwarfRegNum<[218]>;
+def F91 : FP< 91, "f91">, DwarfRegNum<[219]>;
+def F92 : FP< 92, "f92">, DwarfRegNum<[220]>;
+def F93 : FP< 93, "f93">, DwarfRegNum<[221]>;
+def F94 : FP< 94, "f94">, DwarfRegNum<[222]>;
+def F95 : FP< 95, "f95">, DwarfRegNum<[223]>;
+def F96 : FP< 96, "f96">, DwarfRegNum<[224]>;
+def F97 : FP< 97, "f97">, DwarfRegNum<[225]>;
+def F98 : FP< 98, "f98">, DwarfRegNum<[226]>;
+def F99 : FP< 99, "f99">, DwarfRegNum<[227]>;
+def F100 : FP< 100, "f100">, DwarfRegNum<[228]>;
+def F101 : FP< 101, "f101">, DwarfRegNum<[229]>;
+def F102 : FP< 102, "f102">, DwarfRegNum<[230]>;
+def F103 : FP< 103, "f103">, DwarfRegNum<[231]>;
+def F104 : FP< 104, "f104">, DwarfRegNum<[232]>;
+def F105 : FP< 105, "f105">, DwarfRegNum<[233]>;
+def F106 : FP< 106, "f106">, DwarfRegNum<[234]>;
+def F107 : FP< 107, "f107">, DwarfRegNum<[235]>;
+def F108 : FP< 108, "f108">, DwarfRegNum<[236]>;
+def F109 : FP< 109, "f109">, DwarfRegNum<[237]>;
+def F110 : FP< 110, "f110">, DwarfRegNum<[238]>;
+def F111 : FP< 111, "f111">, DwarfRegNum<[239]>;
+def F112 : FP< 112, "f112">, DwarfRegNum<[240]>;
+def F113 : FP< 113, "f113">, DwarfRegNum<[241]>;
+def F114 : FP< 114, "f114">, DwarfRegNum<[242]>;
+def F115 : FP< 115, "f115">, DwarfRegNum<[243]>;
+def F116 : FP< 116, "f116">, DwarfRegNum<[244]>;
+def F117 : FP< 117, "f117">, DwarfRegNum<[245]>;
+def F118 : FP< 118, "f118">, DwarfRegNum<[246]>;
+def F119 : FP< 119, "f119">, DwarfRegNum<[247]>;
+def F120 : FP< 120, "f120">, DwarfRegNum<[248]>;
+def F121 : FP< 121, "f121">, DwarfRegNum<[249]>;
+def F122 : FP< 122, "f122">, DwarfRegNum<[250]>;
+def F123 : FP< 123, "f123">, DwarfRegNum<[251]>;
+def F124 : FP< 124, "f124">, DwarfRegNum<[252]>;
+def F125 : FP< 125, "f125">, DwarfRegNum<[253]>;
+def F126 : FP< 126, "f126">, DwarfRegNum<[254]>;
+def F127 : FP< 127, "f127">, DwarfRegNum<[255]>;
+
+/* predicate registers */
+def p0 : PR< 0, "p0">, DwarfRegNum<[256]>;
+def p1 : PR< 1, "p1">, DwarfRegNum<[257]>;
+def p2 : PR< 2, "p2">, DwarfRegNum<[258]>;
+def p3 : PR< 3, "p3">, DwarfRegNum<[259]>;
+def p4 : PR< 4, "p4">, DwarfRegNum<[260]>;
+def p5 : PR< 5, "p5">, DwarfRegNum<[261]>;
+def p6 : PR< 6, "p6">, DwarfRegNum<[262]>;
+def p7 : PR< 7, "p7">, DwarfRegNum<[263]>;
+def p8 : PR< 8, "p8">, DwarfRegNum<[264]>;
+def p9 : PR< 9, "p9">, DwarfRegNum<[265]>;
+def p10 : PR< 10, "p10">, DwarfRegNum<[266]>;
+def p11 : PR< 11, "p11">, DwarfRegNum<[267]>;
+def p12 : PR< 12, "p12">, DwarfRegNum<[268]>;
+def p13 : PR< 13, "p13">, DwarfRegNum<[269]>;
+def p14 : PR< 14, "p14">, DwarfRegNum<[270]>;
+def p15 : PR< 15, "p15">, DwarfRegNum<[271]>;
+def p16 : PR< 16, "p16">, DwarfRegNum<[272]>;
+def p17 : PR< 17, "p17">, DwarfRegNum<[273]>;
+def p18 : PR< 18, "p18">, DwarfRegNum<[274]>;
+def p19 : PR< 19, "p19">, DwarfRegNum<[275]>;
+def p20 : PR< 20, "p20">, DwarfRegNum<[276]>;
+def p21 : PR< 21, "p21">, DwarfRegNum<[277]>;
+def p22 : PR< 22, "p22">, DwarfRegNum<[278]>;
+def p23 : PR< 23, "p23">, DwarfRegNum<[279]>;
+def p24 : PR< 24, "p24">, DwarfRegNum<[280]>;
+def p25 : PR< 25, "p25">, DwarfRegNum<[281]>;
+def p26 : PR< 26, "p26">, DwarfRegNum<[282]>;
+def p27 : PR< 27, "p27">, DwarfRegNum<[283]>;
+def p28 : PR< 28, "p28">, DwarfRegNum<[284]>;
+def p29 : PR< 29, "p29">, DwarfRegNum<[285]>;
+def p30 : PR< 30, "p30">, DwarfRegNum<[286]>;
+def p31 : PR< 31, "p31">, DwarfRegNum<[287]>;
+def p32 : PR< 32, "p32">, DwarfRegNum<[288]>;
+def p33 : PR< 33, "p33">, DwarfRegNum<[289]>;
+def p34 : PR< 34, "p34">, DwarfRegNum<[290]>;
+def p35 : PR< 35, "p35">, DwarfRegNum<[291]>;
+def p36 : PR< 36, "p36">, DwarfRegNum<[292]>;
+def p37 : PR< 37, "p37">, DwarfRegNum<[293]>;
+def p38 : PR< 38, "p38">, DwarfRegNum<[294]>;
+def p39 : PR< 39, "p39">, DwarfRegNum<[295]>;
+def p40 : PR< 40, "p40">, DwarfRegNum<[296]>;
+def p41 : PR< 41, "p41">, DwarfRegNum<[297]>;
+def p42 : PR< 42, "p42">, DwarfRegNum<[298]>;
+def p43 : PR< 43, "p43">, DwarfRegNum<[299]>;
+def p44 : PR< 44, "p44">, DwarfRegNum<[300]>;
+def p45 : PR< 45, "p45">, DwarfRegNum<[301]>;
+def p46 : PR< 46, "p46">, DwarfRegNum<[302]>;
+def p47 : PR< 47, "p47">, DwarfRegNum<[303]>;
+def p48 : PR< 48, "p48">, DwarfRegNum<[304]>;
+def p49 : PR< 49, "p49">, DwarfRegNum<[305]>;
+def p50 : PR< 50, "p50">, DwarfRegNum<[306]>;
+def p51 : PR< 51, "p51">, DwarfRegNum<[307]>;
+def p52 : PR< 52, "p52">, DwarfRegNum<[308]>;
+def p53 : PR< 53, "p53">, DwarfRegNum<[309]>;
+def p54 : PR< 54, "p54">, DwarfRegNum<[310]>;
+def p55 : PR< 55, "p55">, DwarfRegNum<[311]>;
+def p56 : PR< 56, "p56">, DwarfRegNum<[312]>;
+def p57 : PR< 57, "p57">, DwarfRegNum<[313]>;
+def p58 : PR< 58, "p58">, DwarfRegNum<[314]>;
+def p59 : PR< 59, "p59">, DwarfRegNum<[315]>;
+def p60 : PR< 60, "p60">, DwarfRegNum<[316]>;
+def p61 : PR< 61, "p61">, DwarfRegNum<[317]>;
+def p62 : PR< 62, "p62">, DwarfRegNum<[318]>;
+def p63 : PR< 63, "p63">, DwarfRegNum<[319]>;
+
+// XXX : this is temporary, we'll eventually have the output registers
+// in the general purpose register class too?
+def out0 : GR<0, "out0">, DwarfRegNum<[120]>;
+def out1 : GR<1, "out1">, DwarfRegNum<[121]>;
+def out2 : GR<2, "out2">, DwarfRegNum<[122]>;
+def out3 : GR<3, "out3">, DwarfRegNum<[123]>;
+def out4 : GR<4, "out4">, DwarfRegNum<[124]>;
+def out5 : GR<5, "out5">, DwarfRegNum<[125]>;
+def out6 : GR<6, "out6">, DwarfRegNum<[126]>;
+def out7 : GR<7, "out7">, DwarfRegNum<[127]>;
+
+// application (special) registers:
+
+// "previous function state" application register
+def AR_PFS : GR<0, "ar.pfs">, DwarfRegNum<[331]>;
+
+// "return pointer" (this is really branch register b0)
+def rp : GR<0, "rp">, DwarfRegNum<[-1]>;
+
+// branch reg 6
+def B6 : GR<0, "b6">, DwarfRegNum<[326]>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// these are the scratch (+stacked) general registers
+// FIXME/XXX we also reserve a frame pointer (r5)
+// FIXME/XXX we also reserve r2 for spilling/filling predicates
+// in IA64RegisterInfo.cpp
+// FIXME/XXX we also reserve r22 for calculating addresses
+// in IA64RegisterInfo.cpp
+
+def GR : RegisterClass<"IA64", [i64], 64,
+ [
+
+//FIXME!: for both readability and performance, we don't want the out
+// registers to be the first ones allocated
+
+ out7, out6, out5, out4, out3, out2, out1, out0,
+ r3, r8, r9, r10, r11, r14, r15,
+ r16, r17, r18, r19, r20, r21, r23,
+ r24, r25, r26, r27, r28, r29, r30, r31,
+ r32, r33, r34, r35, r36, r37, r38, r39,
+ r40, r41, r42, r43, r44, r45, r46, r47,
+ r48, r49, r50, r51, r52, r53, r54, r55,
+ r56, r57, r58, r59, r60, r61, r62, r63,
+ r64, r65, r66, r67, r68, r69, r70, r71,
+ r72, r73, r74, r75, r76, r77, r78, r79,
+ r80, r81, r82, r83, r84, r85, r86, r87,
+ r88, r89, r90, r91, r92, r93, r94, r95,
+ r96, r97, r98, r99, r100, r101, r102, r103,
+ r104, r105, r106, r107, r108, r109, r110, r111,
+ r112, r113, r114, r115, r116, r117, r118, r119,
+ // last 17 are special (look down)
+ r120, r121, r122, r123, r124, r125, r126, r127,
+ r0, r1, r2, r5, r12, r13, r22, rp, AR_PFS]>
+ {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GRClass::iterator
+ GRClass::allocation_order_begin(const MachineFunction &MF) const {
+ // hide the 8 out? registers appropriately:
+ return begin()+(8-(MF.getInfo<IA64FunctionInfo>()->outRegsUsed));
+ }
+
+ GRClass::iterator
+ GRClass::allocation_order_end(const MachineFunction &MF) const {
+ // the 9 special registers r0,r1,r2,r5,r12,r13 etc
+ int numReservedRegs=9;
+
+ // we also can't allocate registers for use as locals if they're already
+ // required as 'out' registers
+ numReservedRegs+=MF.getInfo<IA64FunctionInfo>()->outRegsUsed;
+ return end()-numReservedRegs; // hide registers appropriately
+ }
+ }];
+}
+
+
+// these are the scratch (+stacked) FP registers
+
+def FP : RegisterClass<"IA64", [f64], 64,
+ [F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15,
+ F32, F33, F34, F35, F36, F37, F38, F39,
+ F40, F41, F42, F43, F44, F45, F46, F47,
+ F48, F49, F50, F51, F52, F53, F54, F55,
+ F56, F57, F58, F59, F60, F61, F62, F63,
+ F64, F65, F66, F67, F68, F69, F70, F71,
+ F72, F73, F74, F75, F76, F77, F78, F79,
+ F80, F81, F82, F83, F84, F85, F86, F87,
+ F88, F89, F90, F91, F92, F93, F94, F95,
+ F96, F97, F98, F99, F100, F101, F102, F103,
+ F104, F105, F106, F107, F108, F109, F110, F111,
+ F112, F113, F114, F115, F116, F117, F118, F119,
+ F120, F121, F122, F123, F124, F125, F126, F127,
+ F0, F1]> // these last two are hidden
+ {
+// the 128s here are to make stf.spill/ldf.fill happy,
+// when storing full (82-bit) FP regs to stack slots
+// we need to 16-byte align
+ let Size=128;
+ let Alignment=128;
+
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FPClass::iterator
+ FPClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin(); // we don't hide any FP regs from the start
+ }
+
+ FPClass::iterator
+ FPClass::allocation_order_end(const MachineFunction &MF) const {
+ return end()-2; // we hide regs F0, F1 from the end
+ }
+ }];
+}
+
+// these are the predicate registers, p0 (1/TRUE) is not here
+def PR : RegisterClass<"IA64", [i1], 64,
+
+// for now, let's be wimps and only have the scratch predicate regs
+ [p6, p7, p8, p9, p10, p11, p12, p13, p14, p15]> {
+ let Size = 64;
+ }
+
+/*
+ [p1, p2, p3, p4, p5, p6, p7,
+ p8, p9, p10, p11, p12, p13, p14, p15,
+ p16, p17, p18, p19, p20, p21, p22, p23,
+ p24, p25, p26, p27, p28, p29, p30, p31,
+ p32, p33, p34, p35, p36, p37, p38, p39,
+ p40, p41, p42, p43, p44, p45, p46, p47,
+ p48, p49, p50, p51, p52, p53, p54, p55,
+ p56, p57, p58, p59, p60, p61, p62, p63]>;
+ */
diff --git a/lib/Target/IA64/IA64Subtarget.cpp b/lib/Target/IA64/IA64Subtarget.cpp
new file mode 100644
index 000000000000..4eca50bdd22b
--- /dev/null
+++ b/lib/Target/IA64/IA64Subtarget.cpp
@@ -0,0 +1,18 @@
+//===-- IA64Subtarget.cpp - IA64 Subtarget Information ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "IA64Subtarget.h"
+using namespace llvm;
+
+IA64Subtarget::IA64Subtarget() {}
diff --git a/lib/Target/IA64/IA64Subtarget.h b/lib/Target/IA64/IA64Subtarget.h
new file mode 100644
index 000000000000..0387af55119a
--- /dev/null
+++ b/lib/Target/IA64/IA64Subtarget.h
@@ -0,0 +1,28 @@
+//====---- IA64Subtarget.h - Define Subtarget for the IA64 -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IA64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64SUBTARGET_H
+#define IA64SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+namespace llvm {
+
+class IA64Subtarget : public TargetSubtarget {
+public:
+ IA64Subtarget();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.cpp b/lib/Target/IA64/IA64TargetAsmInfo.cpp
new file mode 100644
index 000000000000..2ae8beb9148e
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.cpp
@@ -0,0 +1,44 @@
+//===-- IA64TargetAsmInfo.cpp - IA64 asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the IA64TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+IA64TargetAsmInfo::IA64TargetAsmInfo(const TargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+ CommentString = "//";
+ Data8bitsDirective = "\tdata1\t"; // FIXME: check that we are
+ Data16bitsDirective = "\tdata2.ua\t"; // disabling auto-alignment
+ Data32bitsDirective = "\tdata4.ua\t"; // properly
+ Data64bitsDirective = "\tdata8.ua\t";
+ ZeroDirective = "\t.skip\t";
+ AsciiDirective = "\tstring\t";
+
+ GlobalVarAddrPrefix="";
+ GlobalVarAddrSuffix="";
+ FunctionAddrPrefix="@fptr(";
+ FunctionAddrSuffix=")";
+
+ // FIXME: would be nice to have rodata (no 'w') when appropriate?
+ ConstantPoolSection = "\n\t.section .data, \"aw\", \"progbits\"\n";
+}
+
+unsigned IA64TargetAsmInfo::RelocBehaviour() const {
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::Global);
+}
+
+// FIXME: Support small data/bss/rodata sections someday.
diff --git a/lib/Target/IA64/IA64TargetAsmInfo.h b/lib/Target/IA64/IA64TargetAsmInfo.h
new file mode 100644
index 000000000000..130822e887f4
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetAsmInfo.h
@@ -0,0 +1,33 @@
+//=====-- IA64TargetAsmInfo.h - IA64 asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the IA64TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IA64TARGETASMINFO_H
+#define IA64TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class TargetMachine;
+
+ struct IA64TargetAsmInfo : public ELFTargetAsmInfo {
+ explicit IA64TargetAsmInfo(const TargetMachine &TM);
+ virtual unsigned RelocBehaviour() const;
+ };
+
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
new file mode 100644
index 000000000000..878a00a44518
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -0,0 +1,94 @@
+//===-- IA64TargetMachine.cpp - Define TargetMachine for IA64 -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "IA64TargetAsmInfo.h"
+#include "IA64TargetMachine.h"
+#include "IA64.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int IA64TargetMachineModule;
+int IA64TargetMachineModule = 0;
+
+static RegisterTarget<IA64TargetMachine> X("ia64",
+ "IA-64 (Itanium) [experimental]");
+
+const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
+ return new IA64TargetAsmInfo(*this);
+}
+
+unsigned IA64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // we match [iI][aA]*64
+ bool seenIA64=false;
+ std::string TT = M.getTargetTriple();
+
+ if (TT.size() >= 4) {
+ if( (TT[0]=='i' || TT[0]=='I') &&
+ (TT[1]=='a' || TT[1]=='A') ) {
+ for(unsigned int i=2; i<(TT.size()-1); i++)
+ if(TT[i]=='6' && TT[i+1]=='4')
+ seenIA64=true;
+ }
+
+ if (seenIA64)
+ return 20; // strong match
+ }
+ // If the target triple is something non-ia64, we don't match.
+ if (!TT.empty()) return 0;
+
+#if defined(__ia64__) || defined(__IA64__)
+ return 5;
+#else
+ return 0;
+#endif
+}
+
+/// IA64TargetMachine ctor - Create an LP64 architecture model
+///
+IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("e-f80:128:128"),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
+ TLInfo(*this) { // FIXME? check this stuff
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool IA64TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createIA64DAGToDAGInstructionSelector(*this));
+ return false;
+}
+
+bool IA64TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Make sure everything is bundled happily
+ PM.add(createIA64BundlingPass(*this));
+ return true;
+}
+bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
new file mode 100644
index 000000000000..29d625ce673a
--- /dev/null
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -0,0 +1,64 @@
+//===-- IA64TargetMachine.h - Define TargetMachine for IA64 ---*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IA64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IA64TARGETMACHINE_H
+#define LLVM_TARGET_IA64TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "IA64InstrInfo.h"
+#include "IA64ISelLowering.h"
+#include "IA64Subtarget.h"
+
+namespace llvm {
+
+class IA64TargetMachine : public LLVMTargetMachine {
+ IA64Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ IA64InstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ //IA64JITInfo JITInfo;
+ IA64TargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ IA64TargetMachine(const Module &M, const std::string &FS);
+
+ virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const IA64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual IA64TargetLowering *getTargetLowering() const {
+ return const_cast<IA64TargetLowering*>(&TLInfo);
+ }
+ virtual const IA64RegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+} // End llvm namespace
+
+#endif
+
+
diff --git a/lib/Target/IA64/Makefile b/lib/Target/IA64/Makefile
new file mode 100644
index 000000000000..d38325422c60
--- /dev/null
+++ b/lib/Target/IA64/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Target/IA64/Makefile -----------------------------*- Makefile -*-===##
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIA64CodeGen
+TARGET = IA64
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = IA64GenRegisterInfo.h.inc IA64GenRegisterNames.inc \
+ IA64GenRegisterInfo.inc IA64GenInstrNames.inc \
+ IA64GenInstrInfo.inc IA64GenAsmWriter.inc \
+ IA64GenDAGISel.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/IA64/README b/lib/Target/IA64/README
new file mode 100644
index 000000000000..60761ac11d97
--- /dev/null
+++ b/lib/Target/IA64/README
@@ -0,0 +1,48 @@
+TODO:
+ - Un-bitrot ISel
+ - Hook up If-Conversion a la ARM target
+ - Hook up all branch analysis functions
+ - Instruction scheduling
+ - Bundling
+ - Dynamic Optimization
+ - Testing and bugfixing
+ - stop passing FP args in both FP *and* integer regs when not required
+ - allocate low (nonstacked) registers more aggressively
+ - clean up and thoroughly test the isel patterns.
+ - fix stacked register allocation order: (for readability) we don't want
+ the out? registers being the first ones used
+ - fix up floating point
+ (nb http://gcc.gnu.org/wiki?pagename=ia64%20floating%20point )
+ - bundling!
+ (we will avoid the mess that is:
+ http://gcc.gnu.org/ml/gcc/2003-12/msg00832.html )
+ - instruction scheduling (hmmmm! ;)
+ - counted loop support
+ - make integer + FP mul/div more clever (we have fixed pseudocode atm)
+ - track and use comparison complements
+
+INFO:
+ - we are strictly LP64 here, no support for ILP32 on HP-UX. Linux users
+ don't need to worry about this.
+ - i have instruction scheduling/bundling pseudocode, that really works
+ (has been tested, albeit at the perl-script level).
+ so, before you go write your own, send me an email!
+
+KNOWN DEFECTS AT THE CURRENT TIME:
+ - C++ vtables contain naked function pointers, not function descriptors,
+ which is bad. see http://llvm.cs.uiuc.edu/bugs/show_bug.cgi?id=406
+ - varargs are broken
+ - alloca doesn't work (indeed, stack frame layout is bogus)
+ - no support for big-endian environments
+ - (not really the backend, but...) the CFE has some issues on IA64.
+ these will probably be fixed soon.
+
+ACKNOWLEDGEMENTS:
+ - Chris Lattner (x100)
+ - Other LLVM developers ("hey, that looks familiar")
+
+CONTACT:
+ - You can email me at duraid@octopus.com.au. If you find a small bug,
+ just email me. If you find a big bug, please file a bug report
+ in bugzilla! http://llvm.cs.uiuc.edu is your one stop shop for all
+ things LLVM.
diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt
new file mode 100644
index 000000000000..b1d47ef05ec5
--- /dev/null
+++ b/lib/Target/MSIL/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_target(MSIL
+ MSILWriter.cpp
+ )
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
new file mode 100644
index 000000000000..ada851d4f226
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -0,0 +1,1680 @@
+//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to MSIL code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSILWriter.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+
+namespace {
+ // TargetMachine for the MSIL
+ struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+
+ MSILTarget(const Module &M, const std::string &FS)
+ : DataLayout(&M) {}
+
+ virtual bool WantsWholeFile() const { return true; }
+ virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel);
+
+ // This class always works, but shouldn't be the default in most cases.
+ static unsigned getModuleMatchQuality(const Module &M) { return 1; }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ };
+}
+
+/// MSILTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MSILTargetMachineModule;
+int MSILTargetMachineModule = 0;
+
+static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
+
+bool MSILModule::runOnModule(Module &M) {
+ ModulePtr = &M;
+ TD = &getAnalysis<TargetData>();
+ bool Changed = false;
+ // Find named types.
+ TypeSymbolTable& Table = M.getTypeSymbolTable();
+ std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
+ for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
+ if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+ Table.remove(I++);
+ else {
+ std::set<const Type *>::iterator T = Types.find(I->second);
+ if (T==Types.end())
+ Table.remove(I++);
+ else {
+ Types.erase(T);
+ ++I;
+ }
+ }
+ }
+ // Find unnamed types.
+ unsigned RenameCounter = 0;
+ for (std::set<const Type *>::const_iterator I = Types.begin(),
+ E = Types.end(); I!=E; ++I)
+ if (const StructType *STy = dyn_cast<StructType>(*I)) {
+ while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
+ ++RenameCounter;
+ Changed = true;
+ }
+ // Pointer for FunctionPass.
+ UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
+ return Changed;
+}
+
+char MSILModule::ID = 0;
+char MSILWriter::ID = 0;
+
+bool MSILWriter::runOnFunction(Function &F) {
+ if (F.isDeclaration()) return false;
+
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ LInfo = &getAnalysis<LoopInfo>();
+ printFunction(F);
+ return false;
+}
+
+
+bool MSILWriter::doInitialization(Module &M) {
+ ModulePtr = &M;
+ Mang = new Mangler(M);
+ Out << ".assembly extern mscorlib {}\n";
+ Out << ".assembly MSIL {}\n\n";
+ Out << "// External\n";
+ printExternals();
+ Out << "// Declarations\n";
+ printDeclarations(M.getTypeSymbolTable());
+ Out << "// Definitions\n";
+ printGlobalVariables();
+ Out << "// Startup code\n";
+ printModuleStartup();
+ return false;
+}
+
+
+bool MSILWriter::doFinalization(Module &M) {
+ delete Mang;
+ return false;
+}
+
+
+void MSILWriter::printModuleStartup() {
+ Out <<
+ ".method static public int32 $MSIL_Startup() {\n"
+ "\t.entrypoint\n"
+ "\t.locals (native int i)\n"
+ "\t.locals (native int argc)\n"
+ "\t.locals (native int ptr)\n"
+ "\t.locals (void* argv)\n"
+ "\t.locals (string[] args)\n"
+ "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
+ "\tdup\n"
+ "\tstloc\targs\n"
+ "\tldlen\n"
+ "\tconv.i4\n"
+ "\tdup\n"
+ "\tstloc\targc\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tlocalloc\n"
+ "\tstloc\targv\n"
+ "\tldc.i4.0\n"
+ "\tstloc\ti\n"
+ "L_01:\n"
+ "\tldloc\ti\n"
+ "\tldloc\targc\n"
+ "\tceq\n"
+ "\tbrtrue\tL_02\n"
+ "\tldloc\targs\n"
+ "\tldloc\ti\n"
+ "\tldelem.ref\n"
+ "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
+ "StringToHGlobalAnsi(string)\n"
+ "\tstloc\tptr\n"
+ "\tldloc\targv\n"
+ "\tldloc\ti\n";
+ printPtrLoad(TD->getPointerSize());
+ Out <<
+ "\tmul\n"
+ "\tadd\n"
+ "\tldloc\tptr\n"
+ "\tstind.i\n"
+ "\tldloc\ti\n"
+ "\tldc.i4.1\n"
+ "\tadd\n"
+ "\tstloc\ti\n"
+ "\tbr\tL_01\n"
+ "L_02:\n"
+ "\tcall void $MSIL_Init()\n";
+
+ // Call user 'main' function.
+ const Function* F = ModulePtr->getFunction("main");
+ if (!F || F->isDeclaration()) {
+ Out << "\tldc.i4.0\n\tret\n}\n";
+ return;
+ }
+ bool BadSig = true;
+ std::string Args("");
+ Function::const_arg_iterator Arg1,Arg2;
+
+ switch (F->arg_size()) {
+ case 0:
+ BadSig = false;
+ break;
+ case 1:
+ Arg1 = F->arg_begin();
+ if (Arg1->getType()->isInteger()) {
+ Out << "\tldloc\targc\n";
+ Args = getTypeName(Arg1->getType());
+ BadSig = false;
+ }
+ break;
+ case 2:
+ Arg1 = Arg2 = F->arg_begin(); ++Arg2;
+ if (Arg1->getType()->isInteger() &&
+ Arg2->getType()->getTypeID() == Type::PointerTyID) {
+ Out << "\tldloc\targc\n\tldloc\targv\n";
+ Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
+ BadSig = false;
+ }
+ break;
+ default:
+ BadSig = true;
+ }
+
+ bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
+ if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) {
+ Out << "\tldc.i4.0\n";
+ } else {
+ Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
+ getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
+ if (RetVoid)
+ Out << "\tldc.i4.0\n";
+ else
+ Out << "\tconv.i4\n";
+ }
+ Out << "\tret\n}\n";
+}
+
+bool MSILWriter::isZeroValue(const Value* V) {
+ if (const Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+
+std::string MSILWriter::getValueName(const Value* V) {
+ // Name into the quotes allow control and space characters.
+ return "'"+Mang->getValueName(V)+"'";
+}
+
+
+std::string MSILWriter::getLabelName(const std::string& Name) {
+ if (Name.find('.')!=std::string::npos) {
+ std::string Tmp(Name);
+ // Replace unaccepable characters in the label name.
+ for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
+ if (*I=='.') *I = '@';
+ return Tmp;
+ }
+ return Name;
+}
+
+
+std::string MSILWriter::getLabelName(const Value* V) {
+ return getLabelName(Mang->getValueName(V));
+}
+
+
+std::string MSILWriter::getConvModopt(unsigned CallingConvID) {
+ switch (CallingConvID) {
+ case CallingConv::C:
+ case CallingConv::Cold:
+ case CallingConv::Fast:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
+ case CallingConv::X86_FastCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
+ case CallingConv::X86_StdCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+ default:
+ cerr << "CallingConvID = " << CallingConvID << '\n';
+ assert(0 && "Unsupported calling convention");
+ }
+ return ""; // Not reached
+}
+
+
+std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
+ std::string Tmp = "";
+ const Type* ElemTy = Ty;
+ assert(Ty->getTypeID()==TyID && "Invalid type passed");
+ // Walk trought array element types.
+ for (;;) {
+ // Multidimensional array.
+ if (ElemTy->getTypeID()==TyID) {
+ if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
+ Tmp += utostr(ATy->getNumElements());
+ else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
+ Tmp += utostr(VTy->getNumElements());
+ ElemTy = cast<SequentialType>(ElemTy)->getElementType();
+ }
+ // Base element type found.
+ if (ElemTy->getTypeID()!=TyID) break;
+ Tmp += ",";
+ }
+ return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
+}
+
+
+std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID:
+ return "void ";
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ if(NumBits==1)
+ return "bool ";
+ if (!isSigned)
+ return "unsigned int"+utostr(NumBits)+" ";
+ return "int"+utostr(NumBits)+" ";
+ case Type::FloatTyID:
+ return "float32 ";
+ case Type::DoubleTyID:
+ return "float64 ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid primitive type");
+ }
+ return ""; // Not reached
+}
+
+
+std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
+ bool isNested) {
+ if (Ty->isPrimitiveType() || Ty->isInteger())
+ return getPrimitiveTypeName(Ty,isSigned);
+ // FIXME: "OpaqueType" support
+ switch (Ty->getTypeID()) {
+ case Type::PointerTyID:
+ return "void* ";
+ case Type::StructTyID:
+ if (isNested)
+ return ModulePtr->getTypeName(Ty);
+ return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
+ case Type::ArrayTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ case Type::VectorTyID:
+ if (isNested)
+ return getArrayTypeName(Ty->getTypeID(),Ty);
+ return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
+ default:
+ cerr << "Type = " << *Ty << '\n';
+ assert(0 && "Invalid type in getTypeName()");
+ }
+ return ""; // Not reached
+}
+
+
+MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
+ // Function argument
+ if (isa<Argument>(V))
+ return ArgumentVT;
+ // Function
+ else if (const Function* F = dyn_cast<Function>(V))
+ return F->hasLocalLinkage() ? InternalVT : GlobalVT;
+ // Variable
+ else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
+ return G->hasLocalLinkage() ? InternalVT : GlobalVT;
+ // Constant
+ else if (isa<Constant>(V))
+ return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
+ // Local variable
+ return LocalVT;
+}
+
+
+std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned) {
+ unsigned NumBits = 0;
+ switch (Ty->getTypeID()) {
+ // Integer constant, expanding for stack operations.
+ case Type::IntegerTyID:
+ NumBits = getBitWidth(Ty);
+ // Expand integer value to "int32" or "int64".
+ if (Expand) return (NumBits<=32 ? "i4" : "i8");
+ if (NumBits==1) return "i1";
+ return (isSigned ? "i" : "u")+utostr(NumBits/8);
+ // Float constant.
+ case Type::FloatTyID:
+ return "r4";
+ case Type::DoubleTyID:
+ return "r8";
+ case Type::PointerTyID:
+ return "i"+utostr(TD->getTypeAllocSize(Ty));
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in TypeToPostfix()");
+ }
+ return ""; // Not reached
+}
+
+
+void MSILWriter::printConvToPtr() {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("conv.u4");
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("conv.u8");
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printPtrLoad(uint64_t N) {
+ switch (ModulePtr->getPointerSize()) {
+ case Module::Pointer32:
+ printSimpleInstruction("ldc.i4",utostr(N).c_str());
+ // FIXME: Need overflow test?
+ if (!isUInt32(N)) {
+ cerr << "Value = " << utostr(N) << '\n';
+ assert(0 && "32-bit pointer overflowed");
+ }
+ break;
+ case Module::Pointer64:
+ printSimpleInstruction("ldc.i8",utostr(N).c_str());
+ break;
+ default:
+ assert(0 && "Module use not supporting pointer size");
+ }
+}
+
+
+void MSILWriter::printValuePtrLoad(const Value* V) {
+ printValueLoad(V);
+ printConvToPtr();
+}
+
+
+void MSILWriter::printConstLoad(const Constant* C) {
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
+ // Integer constant
+ Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
+ if (CInt->isMinValue(true))
+ Out << CInt->getSExtValue();
+ else
+ Out << CInt->getZExtValue();
+ } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
+ // Float constant
+ uint64_t X;
+ unsigned Size;
+ if (FP->getType()->getTypeID()==Type::FloatTyID) {
+ X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ Size = 4;
+ } else {
+ X = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ Size = 8;
+ }
+ Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
+ } else if (isa<UndefValue>(C)) {
+ // Undefined constant value = NULL.
+ printPtrLoad(0);
+ } else {
+ cerr << "Constant = " << *C << '\n';
+ assert(0 && "Invalid constant value");
+ }
+ Out << '\n';
+}
+
+
+void MSILWriter::printValueLoad(const Value* V) {
+ MSILWriter::ValueType Location = getValueLocation(V);
+ switch (Location) {
+ // Global variable or function address.
+ case GlobalVT:
+ case InternalVT:
+ if (const Function* F = dyn_cast<Function>(V)) {
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ printSimpleInstruction("ldftn",
+ getCallSignature(F->getFunctionType(),NULL,Name).c_str());
+ } else {
+ std::string Tmp;
+ const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
+ if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
+ Tmp = "void* "+getValueName(V);
+ printSimpleInstruction("ldsfld",Tmp.c_str());
+ } else {
+ Tmp = getTypeName(ElemTy)+getValueName(V);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ }
+ }
+ break;
+ // Function argument.
+ case ArgumentVT:
+ printSimpleInstruction("ldarg",getValueName(V).c_str());
+ break;
+ // Local function variable.
+ case LocalVT:
+ printSimpleInstruction("ldloc",getValueName(V).c_str());
+ break;
+ // Constant value.
+ case ConstVT:
+ if (isa<ConstantPointerNull>(V))
+ printPtrLoad(0);
+ else
+ printConstLoad(cast<Constant>(V));
+ break;
+ // Constant expression.
+ case ConstExprVT:
+ printConstantExpr(cast<ConstantExpr>(V));
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printValueSave(const Value* V) {
+ switch (getValueLocation(V)) {
+ case ArgumentVT:
+ printSimpleInstruction("starg",getValueName(V).c_str());
+ break;
+ case LocalVT:
+ printSimpleInstruction("stloc",getValueName(V).c_str());
+ break;
+ default:
+ cerr << "Value = " << *V << '\n';
+ assert(0 && "Invalid value location");
+ }
+}
+
+
+void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right) {
+ printValueLoad(Left);
+ printValueLoad(Right);
+ Out << '\t' << Name << '\n';
+}
+
+
+void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
+ if(Operand)
+ Out << '\t' << Inst << '\t' << Operand << '\n';
+ else
+ Out << '\t' << Inst << '\n';
+}
+
+
+void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
+ for (BasicBlock::const_iterator I = Dst->begin(), E = Dst->end();
+ isa<PHINode>(I); ++I) {
+ const PHINode* Phi = cast<PHINode>(I);
+ const Value* Val = Phi->getIncomingValueForBlock(Src);
+ if (isa<UndefValue>(Val)) continue;
+ printValueLoad(Val);
+ printValueSave(Phi);
+ }
+}
+
+
+void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB) {
+ if (TrueBB==FalseBB) {
+ // "TrueBB" and "FalseBB" destination equals
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("pop");
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ } else if (FalseBB==NULL) {
+ // If "FalseBB" not used the jump have condition
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ } else if (TrueBB==NULL) {
+ // If "TrueBB" not used the jump is unconditional
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ // Copy PHI instructions for each block
+ std::string TmpLabel;
+ // Print PHI instructions for "TrueBB"
+ if (isa<PHINode>(TrueBB->begin())) {
+ TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ } else {
+ printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
+ }
+ // Print PHI instructions for "FalseBB"
+ if (isa<PHINode>(FalseBB->begin())) {
+ printPHICopy(CurrBB,FalseBB);
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ } else {
+ printSimpleInstruction("br",getLabelName(FalseBB).c_str());
+ }
+ if (isa<PHINode>(TrueBB->begin())) {
+ // Handle "TrueBB" PHI Copy
+ Out << TmpLabel << ":\n";
+ printPHICopy(CurrBB,TrueBB);
+ printSimpleInstruction("br",getLabelName(TrueBB).c_str());
+ }
+ }
+}
+
+
+void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
+ if (Inst->isUnconditional()) {
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
+ } else {
+ printValueLoad(Inst->getCondition());
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
+ Inst->getSuccessor(1));
+ }
+}
+
+
+void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse) {
+ std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
+ printValueLoad(VTrue);
+ printValueLoad(Cond);
+ printSimpleInstruction("brtrue",TmpLabel.c_str());
+ printSimpleInstruction("pop");
+ printValueLoad(VFalse);
+ Out << TmpLabel << ":\n";
+}
+
+
+void MSILWriter::printIndirectLoad(const Value* V) {
+ const Type* Ty = V->getType();
+ printValueLoad(V);
+ if (const PointerType* P = dyn_cast<PointerType>(Ty))
+ Ty = P->getElementType();
+ std::string Tmp = "ldind."+getTypePostfix(Ty, false);
+ printSimpleInstruction(Tmp.c_str());
+}
+
+
+void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
+ printValueLoad(Ptr);
+ printValueLoad(Val);
+ printIndirectSave(Val->getType());
+}
+
+
+void MSILWriter::printIndirectSave(const Type* Ty) {
+ // Instruction need signed postfix for any type.
+ std::string postfix = getTypePostfix(Ty, false);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+}
+
+
+void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty) {
+ std::string Tmp("");
+ printValueLoad(V);
+ switch (Op) {
+ // Signed
+ case Instruction::SExt:
+ case Instruction::SIToFP:
+ case Instruction::FPToSI:
+ Tmp = "conv."+getTypePostfix(Ty,false,true);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Unsigned
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::FPToUI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ Tmp = "conv."+getTypePostfix(Ty,false);
+ printSimpleInstruction(Tmp.c_str());
+ break;
+ // Do nothing
+ case Instruction::BitCast:
+ // FIXME: meaning that ld*/st* instruction do not change data format.
+ break;
+ default:
+ cerr << "Opcode = " << Op << '\n';
+ assert(0 && "Invalid conversion instruction");
+ }
+}
+
+
+void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E) {
+ unsigned Size;
+ // Load address
+ printValuePtrLoad(V);
+ // Calculate element offset.
+ for (; I!=E; ++I){
+ Size = 0;
+ const Value* IndexValue = I.getOperand();
+ if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
+ uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
+ // Offset is the sum of all previous structure fields.
+ for (uint64_t F = 0; F<FieldIndex; ++F)
+ Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F));
+ printPtrLoad(Size);
+ printSimpleInstruction("add");
+ continue;
+ } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
+ Size = TD->getTypeAllocSize(SeqTy->getElementType());
+ } else {
+ Size = TD->getTypeAllocSize(*I);
+ }
+ // Add offset of current element to stack top.
+ if (!isZeroValue(IndexValue)) {
+ // Constant optimization.
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
+ if (C->getValue().isNegative()) {
+ printPtrLoad(C->getValue().abs().getZExtValue()*Size);
+ printSimpleInstruction("sub");
+ continue;
+ } else
+ printPtrLoad(C->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValuePtrLoad(IndexValue);
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("add");
+ }
+ }
+}
+
+
+std::string MSILWriter::getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name) {
+ std::string Tmp("");
+ if (Ty->isVarArg()) Tmp += "vararg ";
+ // Name and return type.
+ Tmp += getTypeName(Ty->getReturnType())+Name+"(";
+ // Function argument type list.
+ unsigned NumParams = Ty->getNumParams();
+ for (unsigned I = 0; I!=NumParams; ++I) {
+ if (I!=0) Tmp += ",";
+ Tmp += getTypeName(Ty->getParamType(I));
+ }
+ // CLR needs to know the exact amount of parameters received by vararg
+ // function, because caller cleans the stack.
+ if (Ty->isVarArg() && Inst) {
+ // Origin to function arguments in "CallInst" or "InvokeInst".
+ unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
+ // Print variable argument types.
+ unsigned NumOperands = Inst->getNumOperands()-Org;
+ if (NumParams<NumOperands) {
+ if (NumParams!=0) Tmp += ", ";
+ Tmp += "... , ";
+ for (unsigned J = NumParams; J!=NumOperands; ++J) {
+ if (J!=NumParams) Tmp += ", ";
+ Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
+ }
+ }
+ }
+ return Tmp+")";
+}
+
+
+void MSILWriter::printFunctionCall(const Value* FnVal,
+ const Instruction* Inst) {
+ // Get function calling convention.
+ std::string Name = "";
+ if (const CallInst* Call = dyn_cast<CallInst>(Inst))
+ Name = getConvModopt(Call->getCallingConv());
+ else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
+ Name = getConvModopt(Invoke->getCallingConv());
+ else {
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Need \"Invoke\" or \"Call\" instruction only");
+ }
+ if (const Function* F = dyn_cast<Function>(FnVal)) {
+ // Direct call.
+ Name += getValueName(F);
+ printSimpleInstruction("call",
+ getCallSignature(F->getFunctionType(),Inst,Name).c_str());
+ } else {
+ // Indirect function call.
+ const PointerType* PTy = cast<PointerType>(FnVal->getType());
+ const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
+ // Load function address.
+ printValueLoad(FnVal);
+ printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
+ }
+}
+
+
+void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
+ std::string Name;
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ Name = getValueName(Inst->getOperand(1));
+ Name.insert(Name.length()-1,"$valist");
+ // Obtain the argument handle.
+ printSimpleInstruction("ldloca",Name.c_str());
+ printSimpleInstruction("arglist");
+ printSimpleInstruction("call",
+ "instance void [mscorlib]System.ArgIterator::.ctor"
+ "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
+ // Save as pointer type "void*"
+ printValueLoad(Inst->getOperand(1));
+ printSimpleInstruction("ldloca",Name.c_str());
+ printIndirectSave(PointerType::getUnqual(IntegerType::get(8)));
+ break;
+ case Intrinsic::vaend:
+ // Close argument list handle.
+ printIndirectLoad(Inst->getOperand(1));
+ printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
+ break;
+ case Intrinsic::vacopy:
+ // Copy "ArgIterator" valuetype.
+ printIndirectLoad(Inst->getOperand(1));
+ printIndirectLoad(Inst->getOperand(2));
+ printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
+ break;
+ default:
+ cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
+ assert(0 && "Invalid intrinsic function");
+ }
+}
+
+
+void MSILWriter::printCallInstruction(const Instruction* Inst) {
+ if (isa<IntrinsicInst>(Inst)) {
+ // Handle intrinsic function.
+ printIntrinsicCall(cast<IntrinsicInst>(Inst));
+ } else {
+ // Load arguments to stack and call function.
+ for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ printFunctionCall(Inst->getOperand(0),Inst);
+ }
+}
+
+
+void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ switch (Predicate) {
+ case ICmpInst::ICMP_EQ:
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case ICmpInst::ICMP_NE:
+ // Emulate = not neg (Op1 eq Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_ULE)
+ printBinaryInstruction("clt.un",Left,Right);
+ else
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
+ printBinaryInstruction("ceq",Left,Right);
+ if (Predicate==ICmpInst::ICMP_UGE)
+ printBinaryInstruction("cgt.un",Left,Right);
+ else
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case ICmpInst::ICMP_ULT:
+ printBinaryInstruction("clt.un",Left,Right);
+ break;
+ case ICmpInst::ICMP_SLT:
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case ICmpInst::ICMP_UGT:
+ printBinaryInstruction("cgt.un",Left,Right);
+ case ICmpInst::ICMP_SGT:
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ default:
+ cerr << "Predicate = " << Predicate << '\n';
+ assert(0 && "Invalid icmp predicate");
+ }
+}
+
+
+void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right) {
+ // FIXME: Correct comparison
+ std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
+ switch (Predicate) {
+ case FCmpInst::FCMP_UGT:
+ // X > Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("cgt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGT:
+ // X > Y
+ printBinaryInstruction("cgt",Left,Right);
+ break;
+ case FCmpInst::FCMP_UGE:
+ // X >= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OGE:
+ // X >= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("cgt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_ULT:
+ // X < Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("clt",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLT:
+ // X < Y
+ printBinaryInstruction("clt",Left,Right);
+ break;
+ case FCmpInst::FCMP_ULE:
+ // X <= Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OLE:
+ // X <= Y
+ printBinaryInstruction("ceq",Left,Right);
+ printBinaryInstruction("clt",Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UEQ:
+ // X == Y || llvm_fcmp_uno(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_OEQ:
+ // X == Y
+ printBinaryInstruction("ceq",Left,Right);
+ break;
+ case FCmpInst::FCMP_UNE:
+ // X != Y
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("neg");
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ONE:
+ // X != Y && llvm_fcmp_ord(X, Y)
+ printBinaryInstruction("ceq",Left,Right);
+ printSimpleInstruction("not");
+ break;
+ case FCmpInst::FCMP_ORD:
+ // return X == X && Y == Y
+ printBinaryInstruction("ceq",Left,Left);
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("or");
+ break;
+ case FCmpInst::FCMP_UNO:
+ // X != X || Y != Y
+ printBinaryInstruction("ceq",Left,Left);
+ printSimpleInstruction("not");
+ printBinaryInstruction("ceq",Right,Right);
+ printSimpleInstruction("not");
+ printSimpleInstruction("or");
+ break;
+ default:
+ assert(0 && "Illegal FCmp predicate");
+ }
+}
+
+
+void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
+ std::string Label = "leave$normal_"+utostr(getUniqID());
+ Out << ".try {\n";
+ // Load arguments
+ for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I)
+ printValueLoad(Inst->getOperand(I));
+ // Print call instruction
+ printFunctionCall(Inst->getOperand(0),Inst);
+ // Save function result and leave "try" block
+ printValueSave(Inst);
+ printSimpleInstruction("leave",Label.c_str());
+ Out << "}\n";
+ Out << "catch [mscorlib]System.Exception {\n";
+ // Redirect to unwind block
+ printSimpleInstruction("pop");
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
+ Out << "}\n" << Label << ":\n";
+ // Redirect to continue block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
+}
+
+
+void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
+ // FIXME: Emulate with IL "switch" instruction
+ // Emulate = if () else if () else if () else ...
+ for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
+ printValueLoad(Inst->getCondition());
+ printValueLoad(Inst->getCaseValue(I));
+ printSimpleInstruction("ceq");
+ // Condition jump to successor block
+ printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
+ }
+ // Jump to default block
+ printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
+}
+
+
+void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
+ printIndirectLoad(Inst->getOperand(0));
+ printSimpleInstruction("call",
+ "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
+ printSimpleInstruction("refanyval","void*");
+ std::string Name =
+ "ldind."+getTypePostfix(PointerType::getUnqual(IntegerType::get(8)),false);
+ printSimpleInstruction(Name.c_str());
+}
+
+
+void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
+ uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType());
+ // Constant optimization.
+ if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+ printPtrLoad(CInt->getZExtValue()*Size);
+ } else {
+ printPtrLoad(Size);
+ printValueLoad(Inst->getOperand(0));
+ printSimpleInstruction("mul");
+ }
+ printSimpleInstruction("localloc");
+}
+
+
+void MSILWriter::printInstruction(const Instruction* Inst) {
+ const Value *Left = 0, *Right = 0;
+ if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
+ if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
+ // Print instruction
+ // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
+ switch (Inst->getOpcode()) {
+ // Terminator
+ case Instruction::Ret:
+ if (Inst->getNumOperands()) {
+ printValueLoad(Left);
+ printSimpleInstruction("ret");
+ } else
+ printSimpleInstruction("ret");
+ break;
+ case Instruction::Br:
+ printBranchInstruction(cast<BranchInst>(Inst));
+ break;
+ // Binary
+ case Instruction::Add:
+ printBinaryInstruction("add",Left,Right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",Left,Right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",Left,Right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",Left,Right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",Left,Right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",Left,Right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",Left,Right);
+ break;
+ // Binary Condition
+ case Instruction::ICmp:
+ printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
+ break;
+ // Bitwise Binary
+ case Instruction::And:
+ printBinaryInstruction("and",Left,Right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",Left,Right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",Left,Right);
+ break;
+ case Instruction::Shl:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shl");
+ break;
+ case Instruction::LShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr.un");
+ break;
+ case Instruction::AShr:
+ printValueLoad(Left);
+ printValueLoad(Right);
+ printSimpleInstruction("conv.i4");
+ printSimpleInstruction("shr");
+ break;
+ case Instruction::Select:
+ printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
+ break;
+ case Instruction::Load:
+ printIndirectLoad(Inst->getOperand(0));
+ break;
+ case Instruction::Store:
+ printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(Inst->getOpcode(),Left,
+ cast<CastInst>(Inst)->getDestTy());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
+ gep_type_end(Inst));
+ break;
+ case Instruction::Call:
+ printCallInstruction(cast<CallInst>(Inst));
+ break;
+ case Instruction::Invoke:
+ printInvokeInstruction(cast<InvokeInst>(Inst));
+ break;
+ case Instruction::Unwind:
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor()");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::Switch:
+ printSwitchInstruction(cast<SwitchInst>(Inst));
+ break;
+ case Instruction::Alloca:
+ printAllocaInstruction(cast<AllocaInst>(Inst));
+ break;
+ case Instruction::Malloc:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Free:
+ assert(0 && "LowerAllocationsPass used");
+ break;
+ case Instruction::Unreachable:
+ printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
+ printSimpleInstruction("newobj",
+ "instance void [mscorlib]System.Exception::.ctor(string)");
+ printSimpleInstruction("throw");
+ break;
+ case Instruction::VAArg:
+ printVAArgInstruction(cast<VAArgInst>(Inst));
+ break;
+ default:
+ cerr << "Instruction = " << Inst->getName() << '\n';
+ assert(0 && "Unsupported instruction");
+ }
+}
+
+
+void MSILWriter::printLoop(const Loop* L) {
+ Out << getLabelName(L->getHeader()->getName()) << ":\n";
+ const std::vector<BasicBlock*>& blocks = L->getBlocks();
+ for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
+ BasicBlock* BB = blocks[I];
+ Loop* BBLoop = LInfo->getLoopFor(BB);
+ if (BBLoop == L)
+ printBasicBlock(BB);
+ else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
+ printLoop(BBLoop);
+ }
+ printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
+}
+
+
+void MSILWriter::printBasicBlock(const BasicBlock* BB) {
+ Out << getLabelName(BB) << ":\n";
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+ const Instruction* Inst = I;
+ // Comment llvm original instruction
+ // Out << "\n//" << *Inst << "\n";
+ // Do not handle PHI instruction in current block
+ if (Inst->getOpcode()==Instruction::PHI) continue;
+ // Print instruction
+ printInstruction(Inst);
+ // Save result
+ if (Inst->getType()!=Type::VoidTy) {
+ // Do not save value after invoke, it done in "try" block
+ if (Inst->getOpcode()==Instruction::Invoke) continue;
+ printValueSave(Inst);
+ }
+ }
+}
+
+
+void MSILWriter::printLocalVariables(const Function& F) {
+ std::string Name;
+ const Type* Ty = NULL;
+ std::set<const Value*> Printed;
+ const Value* VaList = NULL;
+ unsigned StackDepth = 8;
+ // Find local variables
+ for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
+ if (I->getOpcode()==Instruction::Call ||
+ I->getOpcode()==Instruction::Invoke) {
+ // Test stack depth.
+ if (StackDepth<I->getNumOperands())
+ StackDepth = I->getNumOperands();
+ }
+ const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
+ if (AI && !isa<GlobalVariable>(AI)) {
+ // Local variable allocation.
+ Ty = PointerType::getUnqual(AI->getAllocatedType());
+ Name = getValueName(AI);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ } else if (I->getType()!=Type::VoidTy) {
+ // Operation result.
+ Ty = I->getType();
+ Name = getValueName(&*I);
+ Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
+ }
+ // Test on 'va_list' variable
+ bool isVaList = false;
+ if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
+ // "va_list" as "va_arg" instruction operand.
+ isVaList = true;
+ VaList = VaInst->getOperand(0);
+ } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
+ // "va_list" as intrinsic function operand.
+ switch (Inst->getIntrinsicID()) {
+ case Intrinsic::vastart:
+ case Intrinsic::vaend:
+ case Intrinsic::vacopy:
+ isVaList = true;
+ VaList = Inst->getOperand(1);
+ break;
+ default:
+ isVaList = false;
+ }
+ }
+ // Print "va_list" variable.
+ if (isVaList && Printed.insert(VaList).second) {
+ Name = getValueName(VaList);
+ Name.insert(Name.length()-1,"$valist");
+ Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
+ << Name << ")\n";
+ }
+ }
+ printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
+}
+
+
+void MSILWriter::printFunctionBody(const Function& F) {
+ // Print body
+ for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
+ if (Loop *L = LInfo->getLoopFor(I)) {
+ if (L->getHeader()==I && L->getParentLoop()==0)
+ printLoop(L);
+ } else {
+ printBasicBlock(I);
+ }
+ }
+}
+
+
+void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
+ const Value *left = 0, *right = 0;
+ if (CE->getNumOperands()>=1) left = CE->getOperand(0);
+ if (CE->getNumOperands()>=2) right = CE->getOperand(1);
+ // Print instruction
+ switch (CE->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ printCastInstruction(CE->getOpcode(),left,CE->getType());
+ break;
+ case Instruction::GetElementPtr:
+ printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
+ break;
+ case Instruction::ICmp:
+ printICmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::FCmp:
+ printFCmpInstruction(CE->getPredicate(),left,right);
+ break;
+ case Instruction::Select:
+ printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
+ break;
+ case Instruction::Add:
+ printBinaryInstruction("add",left,right);
+ break;
+ case Instruction::Sub:
+ printBinaryInstruction("sub",left,right);
+ break;
+ case Instruction::Mul:
+ printBinaryInstruction("mul",left,right);
+ break;
+ case Instruction::UDiv:
+ printBinaryInstruction("div.un",left,right);
+ break;
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ printBinaryInstruction("div",left,right);
+ break;
+ case Instruction::URem:
+ printBinaryInstruction("rem.un",left,right);
+ break;
+ case Instruction::SRem:
+ case Instruction::FRem:
+ printBinaryInstruction("rem",left,right);
+ break;
+ case Instruction::And:
+ printBinaryInstruction("and",left,right);
+ break;
+ case Instruction::Or:
+ printBinaryInstruction("or",left,right);
+ break;
+ case Instruction::Xor:
+ printBinaryInstruction("xor",left,right);
+ break;
+ case Instruction::Shl:
+ printBinaryInstruction("shl",left,right);
+ break;
+ case Instruction::LShr:
+ printBinaryInstruction("shr.un",left,right);
+ break;
+ case Instruction::AShr:
+ printBinaryInstruction("shr",left,right);
+ break;
+ default:
+ cerr << "Expression = " << *CE << "\n";
+ assert(0 && "Invalid constant expression");
+ }
+}
+
+
+void MSILWriter::printStaticInitializerList() {
+ // List of global variables with uninitialized fields.
+ for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
+ VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
+ ++VarI) {
+ const std::vector<StaticInitializer>& InitList = VarI->second;
+ if (InitList.empty()) continue;
+ // For each uninitialized field.
+ for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
+ E = InitList.end(); I!=E; ++I) {
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
+ // Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
+ // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
+ // Load variable address
+ printValueLoad(VarI->first);
+ // Add offset
+ if (I->offset!=0) {
+ printPtrLoad(I->offset);
+ printSimpleInstruction("add");
+ }
+ // Load value
+ printConstantExpr(CE);
+ // Save result at offset
+ std::string postfix = getTypePostfix(CE->getType(),true);
+ if (*postfix.begin()=='u') *postfix.begin() = 'i';
+ postfix = "stind."+postfix;
+ printSimpleInstruction(postfix.c_str());
+ } else {
+ cerr << "Constant = " << *I->constant << '\n';
+ assert(0 && "Invalid static initializer");
+ }
+ }
+ }
+}
+
+
+void MSILWriter::printFunction(const Function& F) {
+ bool isSigned = F.paramHasAttr(0, Attribute::SExt);
+ Out << "\n.method static ";
+ Out << (F.hasLocalLinkage() ? "private " : "public ");
+ if (F.isVarArg()) Out << "vararg ";
+ Out << getTypeName(F.getReturnType(),isSigned) <<
+ getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
+ // Arguments
+ Out << "\t(";
+ unsigned ArgIdx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
+ ++I, ++ArgIdx) {
+ isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt);
+ if (I!=F.arg_begin()) Out << ", ";
+ Out << getTypeName(I->getType(),isSigned) << getValueName(I);
+ }
+ Out << ") cil managed\n";
+ // Body
+ Out << "{\n";
+ printLocalVariables(F);
+ printFunctionBody(F);
+ Out << "}\n";
+}
+
+
+void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
+ std::string Name;
+ std::set<const Type*> Printed;
+ for (std::set<const Type*>::const_iterator
+ UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
+ const Type* Ty = *UI;
+ if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+ Name = getTypeName(Ty, false, true);
+ // Type with no need to declare.
+ else continue;
+ // Print not duplicated type
+ if (Printed.insert(Ty).second) {
+ Out << ".class value explicit ansi sealed '" << Name << "'";
+ Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty);
+ Out << " }\n\n";
+ }
+ }
+}
+
+
+unsigned int MSILWriter::getBitWidth(const Type* Ty) {
+ unsigned int N = Ty->getPrimitiveSizeInBits();
+ assert(N!=0 && "Invalid type in getBitWidth()");
+ switch (N) {
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return N;
+ default:
+ cerr << "Bits = " << N << '\n';
+ assert(0 && "Unsupported integer width");
+ }
+ return 0; // Not reached
+}
+
+
+void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
+ uint64_t TySize = 0;
+ const Type* Ty = C->getType();
+ // Print zero initialized constant.
+ if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
+ TySize = TD->getTypeAllocSize(C->getType());
+ Offset += TySize;
+ Out << "int8 (0) [" << TySize << "]";
+ return;
+ }
+ // Print constant initializer
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ TySize = TD->getTypeAllocSize(Ty);
+ const ConstantInt* Int = cast<ConstantInt>(C);
+ Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
+ break;
+ }
+ case Type::FloatTyID:
+ case Type::DoubleTyID: {
+ TySize = TD->getTypeAllocSize(Ty);
+ const ConstantFP* FP = cast<ConstantFP>(C);
+ if (Ty->getTypeID() == Type::FloatTyID)
+ Out << "int32 (" <<
+ (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
+ else
+ Out << "int64 (" <<
+ FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
+ break;
+ }
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
+ if (I!=0) Out << ",\n";
+ printStaticConstant(C->getOperand(I),Offset);
+ }
+ break;
+ case Type::PointerTyID:
+ TySize = TD->getTypeAllocSize(C->getType());
+ // Initialize with global variable address
+ if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
+ std::string name = getValueName(G);
+ Out << "&(" << name.insert(name.length()-1,"$data") << ")";
+ } else {
+ // Dynamic initialization
+ if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
+ InitListPtr->push_back(StaticInitializer(C,Offset));
+ // Null pointer initialization
+ if (TySize==4) Out << "int32 (0)";
+ else if (TySize==8) Out << "int64 (0)";
+ else assert(0 && "Invalid pointer size");
+ }
+ break;
+ default:
+ cerr << "TypeID = " << Ty->getTypeID() << '\n';
+ assert(0 && "Invalid type in printStaticConstant()");
+ }
+ // Increase offset.
+ Offset += TySize;
+}
+
+
+void MSILWriter::printStaticInitializer(const Constant* C,
+ const std::string& Name) {
+ switch (C->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ Out << getPrimitiveTypeName(C->getType(), false);
+ break;
+ case Type::ArrayTyID:
+ case Type::VectorTyID:
+ case Type::StructTyID:
+ case Type::PointerTyID:
+ Out << getTypeName(C->getType());
+ break;
+ default:
+ cerr << "Type = " << *C << "\n";
+ assert(0 && "Invalid constant type");
+ }
+ // Print initializer
+ std::string label = Name;
+ label.insert(label.length()-1,"$data");
+ Out << Name << " at " << label << '\n';
+ Out << ".data " << label << " = {\n";
+ uint64_t offset = 0;
+ printStaticConstant(C,offset);
+ Out << "\n}\n\n";
+}
+
+
+void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
+ const Constant* C = G->getInitializer();
+ if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+ InitListPtr = 0;
+ else
+ InitListPtr = &StaticInitList[G];
+ printStaticInitializer(C,getValueName(G));
+}
+
+
+void MSILWriter::printGlobalVariables() {
+ if (ModulePtr->global_empty()) return;
+ Module::global_iterator I,E;
+ for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
+ // Variable definition
+ Out << ".field static " << (I->isDeclaration() ? "public " :
+ "private ");
+ if (I->isDeclaration()) {
+ Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
+ } else
+ printVariableDefinition(&*I);
+ }
+}
+
+
+const char* MSILWriter::getLibraryName(const Function* F) {
+ return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv());
+}
+
+
+const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
+ return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0);
+}
+
+
+const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv) {
+ // TODO: Read *.def file with function and libraries definitions.
+ return "MSVCRT.DLL";
+}
+
+
+void MSILWriter::printExternals() {
+ Module::const_iterator I,E;
+ // Functions.
+ for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
+ // Skip intrisics
+ if (I->isIntrinsic()) continue;
+ if (I->isDeclaration()) {
+ const Function* F = I;
+ std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
+ std::string Sig =
+ getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
+ Out << ".method static hidebysig pinvokeimpl(\""
+ << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
+ }
+ }
+ // External variables and static initialization.
+ Out <<
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int LoadLibrary(string) preservesig {}\n"
+ ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
+ " native int GetProcAddress(native int, string) preservesig {}\n";
+ Out <<
+ ".method private static void* $MSIL_Import(string lib,string sym)\n"
+ " managed cil\n{\n"
+ "\tldarg\tlib\n"
+ "\tcall\tnative int LoadLibrary(string)\n"
+ "\tldarg\tsym\n"
+ "\tcall\tnative int GetProcAddress(native int,string)\n"
+ "\tdup\n"
+ "\tbrtrue\tL_01\n"
+ "\tldstr\t\"Can no import variable\"\n"
+ "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
+ "\tthrow\n"
+ "L_01:\n"
+ "\tret\n"
+ "}\n\n"
+ ".method static private void $MSIL_Init() managed cil\n{\n";
+ printStaticInitializerList();
+ // Foreach global variable.
+ for (Module::global_iterator I = ModulePtr->global_begin(),
+ E = ModulePtr->global_end(); I!=E; ++I) {
+ if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
+ // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
+ std::string Label = "not_null$_"+utostr(getUniqID());
+ std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
+ printSimpleInstruction("ldsflda",Tmp.c_str());
+ Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
+ Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n";
+ printSimpleInstruction("call","void* $MSIL_Import(string,string)");
+ printIndirectSave(I->getType());
+ }
+ printSimpleInstruction("ret");
+ Out << "}\n\n";
+}
+
+
+//===----------------------------------------------------------------------===//
+// External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel)
+{
+ if (FileType != TargetMachine::AssemblyFile) return true;
+ MSILWriter* Writer = new MSILWriter(o);
+ PM.add(createGCLoweringPass());
+ PM.add(createLowerAllocationsPass(true));
+ // FIXME: Handle switch trougth native IL instruction "switch"
+ PM.add(createLowerSwitchPass());
+ PM.add(createCFGSimplificationPass());
+ PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
+ PM.add(Writer);
+ PM.add(createGCInfoDeleter());
+ return false;
+}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
new file mode 100644
index 000000000000..45f5579bfb34
--- /dev/null
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -0,0 +1,255 @@
+//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSILWriter that is used by the MSIL.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MSILWRITER_H
+#define MSILWRITER_H
+
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Support/Mangler.h"
+#include <ios>
+using namespace llvm;
+
+namespace {
+
+ class MSILModule : public ModulePass {
+ Module *ModulePtr;
+ const std::set<const Type *>*& UsedTypes;
+ const TargetData*& TD;
+
+ public:
+ static char ID;
+ MSILModule(const std::set<const Type *>*& _UsedTypes,
+ const TargetData*& _TD)
+ : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<FindUsedTypes>();
+ AU.addRequired<TargetData>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MSIL backend definitions";
+ }
+
+ virtual bool runOnModule(Module &M);
+
+ };
+
+ class MSILWriter : public FunctionPass {
+ struct StaticInitializer {
+ const Constant* constant;
+ uint64_t offset;
+
+ StaticInitializer()
+ : constant(0), offset(0) {}
+
+ StaticInitializer(const Constant* _constant, uint64_t _offset)
+ : constant(_constant), offset(_offset) {}
+ };
+
+ uint64_t UniqID;
+
+ uint64_t getUniqID() {
+ return ++UniqID;
+ }
+
+ public:
+ raw_ostream &Out;
+ Module* ModulePtr;
+ const TargetData* TD;
+ Mangler* Mang;
+ LoopInfo *LInfo;
+ std::vector<StaticInitializer>* InitListPtr;
+ std::map<const GlobalVariable*,std::vector<StaticInitializer> >
+ StaticInitList;
+ const std::set<const Type *>* UsedTypes;
+ static char ID;
+ MSILWriter(raw_ostream &o) : FunctionPass(&ID), Out(o) {
+ UniqID = 0;
+ }
+
+ enum ValueType {
+ UndefVT,
+ GlobalVT,
+ InternalVT,
+ ArgumentVT,
+ LocalVT,
+ ConstVT,
+ ConstExprVT
+ };
+
+ bool isVariable(ValueType V) {
+ return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
+ }
+
+ bool isConstValue(ValueType V) {
+ return V==ConstVT || V==ConstExprVT;
+ }
+
+ virtual const char *getPassName() const { return "MSIL backend"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F);
+
+ virtual bool doInitialization(Module &M);
+
+ virtual bool doFinalization(Module &M);
+
+ void printModuleStartup();
+
+ bool isZeroValue(const Value* V);
+
+ std::string getValueName(const Value* V);
+
+ std::string getLabelName(const Value* V);
+
+ std::string getLabelName(const std::string& Name);
+
+ std::string getConvModopt(unsigned CallingConvID);
+
+ std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
+
+ std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
+
+ std::string getFunctionTypeName(const Type* Ty);
+
+ std::string getPointerTypeName(const Type* Ty);
+
+ std::string getTypeName(const Type* Ty, bool isSigned = false,
+ bool isNested = false);
+
+ ValueType getValueLocation(const Value* V);
+
+ std::string getTypePostfix(const Type* Ty, bool Expand,
+ bool isSigned = false);
+
+ void printConvToPtr();
+
+ void printPtrLoad(uint64_t N);
+
+ void printValuePtrLoad(const Value* V);
+
+ void printConstLoad(const Constant* C);
+
+ void printValueLoad(const Value* V);
+
+ void printValueSave(const Value* V);
+
+ void printBinaryInstruction(const char* Name, const Value* Left,
+ const Value* Right);
+
+ void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
+
+ void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
+
+ void printBranchToBlock(const BasicBlock* CurrBB,
+ const BasicBlock* TrueBB,
+ const BasicBlock* FalseBB);
+
+ void printBranchInstruction(const BranchInst* Inst);
+
+ void printSelectInstruction(const Value* Cond, const Value* VTrue,
+ const Value* VFalse);
+
+ void printIndirectLoad(const Value* V);
+
+ void printIndirectSave(const Value* Ptr, const Value* Val);
+
+ void printIndirectSave(const Type* Ty);
+
+ void printCastInstruction(unsigned int Op, const Value* V,
+ const Type* Ty);
+
+ void printGepInstruction(const Value* V, gep_type_iterator I,
+ gep_type_iterator E);
+
+ std::string getCallSignature(const FunctionType* Ty,
+ const Instruction* Inst,
+ std::string Name);
+
+ void printFunctionCall(const Value* FnVal, const Instruction* Inst);
+
+ void printIntrinsicCall(const IntrinsicInst* Inst);
+
+ void printCallInstruction(const Instruction* Inst);
+
+ void printICmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printFCmpInstruction(unsigned Predicate, const Value* Left,
+ const Value* Right);
+
+ void printInvokeInstruction(const InvokeInst* Inst);
+
+ void printSwitchInstruction(const SwitchInst* Inst);
+
+ void printVAArgInstruction(const VAArgInst* Inst);
+
+ void printAllocaInstruction(const AllocaInst* Inst);
+
+ void printInstruction(const Instruction* Inst);
+
+ void printLoop(const Loop* L);
+
+ void printBasicBlock(const BasicBlock* BB);
+
+ void printLocalVariables(const Function& F);
+
+ void printFunctionBody(const Function& F);
+
+ void printConstantExpr(const ConstantExpr* CE);
+
+ void printStaticInitializerList();
+
+ void printFunction(const Function& F);
+
+ void printDeclarations(const TypeSymbolTable& ST);
+
+ unsigned int getBitWidth(const Type* Ty);
+
+ void printStaticConstant(const Constant* C, uint64_t& Offset);
+
+ void printStaticInitializer(const Constant* C, const std::string& Name);
+
+ void printVariableDefinition(const GlobalVariable* G);
+
+ void printGlobalVariables();
+
+ const char* getLibraryName(const Function* F);
+
+ const char* getLibraryName(const GlobalVariable* GV);
+
+ const char* getLibraryForSymbol(const char* Name, bool isFunction,
+ unsigned CallingConv);
+
+ void printExternals();
+ };
+}
+
+#endif
+
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
new file mode 100644
index 000000000000..94265edf98c5
--- /dev/null
+++ b/lib/Target/MSIL/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSIL
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
new file mode 100644
index 000000000000..d797c71fd39f
--- /dev/null
+++ b/lib/Target/MSIL/README.TXT
@@ -0,0 +1,26 @@
+//===---------------------------------------------------------------------===//
+
+Vector instructions support.
+
+ShuffleVector
+ExtractElement
+InsertElement
+
+//===---------------------------------------------------------------------===//
+
+Add "OpaqueType" type.
+
+//===---------------------------------------------------------------------===//
+
+"switch" instruction emulation with CLI "switch" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Write linker for external function, because function export need to know
+dynamic library where function located.
+
+.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
+ void free(void*) preservesig {}
+
+
+
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
new file mode 100644
index 000000000000..67017733cd9c
--- /dev/null
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS MSP430.td)
+
+tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MSP430GenRegisterNames.inc -gen-register-enums)
+tablegen(MSP430GenRegisterInfo.inc -gen-register-desc)
+tablegen(MSP430GenInstrNames.inc -gen-instr-enums)
+tablegen(MSP430GenInstrInfo.inc -gen-instr-desc)
+tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
+tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
+tablegen(MSP430GenCallingConv.inc -gen-callingconv)
+tablegen(MSP430GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MSP430
+ MSP430AsmPrinter.cpp
+ MSP430FrameInfo.cpp
+ MSP430InstrInfo.cpp
+ MSP430ISelDAGToDAG.cpp
+ MSP430ISelLowering.cpp
+ MSP430RegisterInfo.cpp
+ MSP430Subtarget.cpp
+ MSP430TargetAsmInfo.cpp
+ MSP430TargetMachine.cpp
+ )
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
new file mode 100644
index 000000000000..ed0cd0496aaa
--- /dev/null
+++ b/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,40 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MSP430TargetMachine;
+ class FunctionPass;
+ class raw_ostream;
+
+ FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+ FunctionPass *createMSP430CodePrinterPass(raw_ostream &o,
+ MSP430TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#include "MSP430GenRegisterNames.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#include "MSP430GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
new file mode 100644
index 000000000000..89313ab59c1b
--- /dev/null
+++ b/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,60 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+ "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+ let InstructionSet = MSP430InstrInfo;
+}
+
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 000000000000..71b785bb4fe5
--- /dev/null
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,267 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
+ public:
+ MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM,
+ const TargetAsmInfo *TAI,
+ CodeGenOpt::Level OL, bool V)
+ : AsmPrinter(O, TM, TAI, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "MSP430 Assembly Printer";
+ }
+
+ void printOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier = 0);
+ void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int OpNum);
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr * MI);
+
+ void emitFunctionHeader(const MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ }
+ };
+} // end of anonymous namespace
+
+#include "MSP430GenAsmWriter.inc"
+
+/// createMSP430CodePrinterPass - Returns a pass that prints the MSP430
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o,
+ MSP430TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+bool MSP430AsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+ return false; // success
+}
+
+
+bool MSP430AsmPrinter::doFinalization(Module &M) {
+ return AsmPrinter::doFinalization(M);
+}
+
+void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+
+ EmitAlignment(FnAlign, F);
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ",@function\n"
+ << CurrentFnName << ":\n";
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print the 'header' of function
+ emitFunctionHeader(MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) {
+ // This is an entry block or a block that's only reachable via a
+ // fallthrough edge. In non-VerboseAsm mode, don't print the label.
+ } else {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II)
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+ O.flush();
+
+ // We didn't modify anything
+ return false;
+}
+
+void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ if (printInstruction(MI))
+ return;
+
+ assert(0 && "Should not happen");
+}
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should be already mapped!");
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ return;
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || strcmp(Modifier, "nohash"))
+ O << '#';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name = Mang->getValueName(MO.getGlobal());
+ assert(MO.getOffset() == 0 && "No offsets allowed!");
+
+ if (isCallOp)
+ O << '#';
+ else if (isMemOp)
+ O << '&';
+
+ O << Name;
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp)
+ O << '#';
+ O << Name;
+ return;
+ }
+ default:
+ assert(0 && "Not implemented yet!");
+ }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+ const char* Modifier) {
+ const MachineOperand &Base = MI->getOperand(OpNum);
+ const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+ if (Base.isGlobal())
+ printOperand(MI, OpNum, "mem");
+ else if (Disp.isImm() && !Base.getReg())
+ printOperand(MI, OpNum);
+ else if (Base.getReg()) {
+ if (Disp.getImm()) {
+ printOperand(MI, OpNum + 1, "nohash");
+ O << '(';
+ printOperand(MI, OpNum);
+ O << ')';
+ } else {
+ O << '@';
+ printOperand(MI, OpNum);
+ }
+ } else
+ assert(0 && "Unsupported memory operand");
+}
+
+void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
+ unsigned CC = MI->getOperand(OpNum).getImm();
+
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported CC code");
+ break;
+ case MSP430::COND_E:
+ O << "eq";
+ break;
+ case MSP430::COND_NE:
+ O << "ne";
+ break;
+ case MSP430::COND_HS:
+ O << "hs";
+ break;
+ case MSP430::COND_LO:
+ O << "lo";
+ break;
+ case MSP430::COND_GE:
+ O << "ge";
+ break;
+ case MSP430::COND_L:
+ O << 'l';
+ break;
+ }
+}
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 000000000000..ad27cc9122a8
--- /dev/null
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+ // i8 are returned in registers R15B, R14B, R13B, R12B
+ CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+ // i16 are returned in registers R15, R14, R13, R12
+ CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+ // Promote i8 arguments to i16.
+ CCIfType<[i8], CCPromoteToType<i16>>,
+
+ // The first 4 integer arguments of non-varargs functions are passed in
+ // integer registers.
+ CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+ // Integer values get stored in stack slots that are 2 bytes in
+ // size and 2-byte aligned.
+ CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..bf49ec0bff46
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,194 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class MSP430DAGToDAGISel : public SelectionDAGISel {
+ MSP430TargetLowering &Lowering;
+ const MSP430Subtarget &Subtarget;
+
+ public:
+ MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "MSP430 DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "MSP430GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDValue Op);
+ bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
+
+ #ifndef NDEBUG
+ unsigned Indent;
+ #endif
+ };
+} // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+// FIXME: This is pretty dummy routine and needs to be rewritten in the future.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Disp) {
+ // Try to match frame address first.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ }
+
+ switch (Addr.getOpcode()) {
+ case ISD::ADD:
+ // Operand is a result from ADD with constant operand which fits into i16.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ uint64_t CVal = CN->getZExtValue();
+ // Offset should fit into 16 bits.
+ if (((CVal << 48) >> 48) == CVal) {
+ SDValue N0 = Addr.getOperand(0);
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
+ else
+ Base = N0;
+
+ Disp = CurDAG->getTargetConstant(CVal, MVT::i16);
+ return true;
+ }
+ }
+ break;
+ case MSP430ISD::Wrapper:
+ SDValue N0 = Addr.getOperand(0);
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ Base = CurDAG->getTargetGlobalAddress(G->getGlobal(),
+ MVT::i16, G->getOffset());
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ }
+ break;
+ };
+
+ Base = Addr;
+ Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+ return true;
+}
+
+
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void MSP430DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Codegen the basic block.
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+#endif
+ SelectRoot(*CurDAG);
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+#endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Dump information about the Node being selected
+ #ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+ #endif
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+ return NULL;
+ }
+
+ // Few custom selection stuff.
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::FrameIndex: {
+ assert(Op.getValueType() == MVT::i16);
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ return CurDAG->getTargetNode(MSP430::ADD16ri, dl, MVT::i16,
+ TFI, CurDAG->getTargetConstant(0, MVT::i16));
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Op);
+
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == Op.getNode())
+ DEBUG(Op.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+
+ return ResNode;
+}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 000000000000..14db20e5fcd6
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,670 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+ TargetLowering(tm), Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
+ addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Provide all sorts of operation actions
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ // Even if we have only 1 bit shift here, we can perform
+ // shifts of the whole bitwidth 1 bit per step.
+ setShiftAmountType(MVT::i8);
+
+ setStackPointerRegisterToSaveRestore(MSP430::SPW);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setSchedulingPreference(SchedulingForLatency);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // We don't have any truncstores
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i8, Expand);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Expand);
+ setOperationAction(ISD::SETCC, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom);
+
+ // FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::SHL: // FALLTHROUGH
+ case ISD::SRL:
+ case ISD::SRA: return LowerShifts(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ default:
+ assert(0 && "unimplemented operand");
+ return SDValue();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue MSP430TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Op, DAG);
+ }
+}
+
+SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ switch (CallingConv) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op,
+ SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MSP430);
+
+ assert(!isVarArg && "Varargs not supported yet");
+
+ SmallVector<SDValue, 16> ArgValues;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ MVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT()) {
+ default:
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << RegVT.getSimpleVT()
+ << "\n";
+ abort();
+ case MVT::i16:
+ unsigned VReg =
+ RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, VReg, RegVT);
+
+ // If this is an 8-bit value, it is really passed promoted to 16
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ ArgValues.push_back(ArgValue);
+ }
+ } else {
+ // Sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > 2) {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << VA.getLocVT().getSimpleVT()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+ ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0));
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_MSP430);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // Guarantee that all emitted copies are stuck together,
+ // avoiding something bad.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+ // Return Void
+ return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
+ unsigned CC) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ CCInfo.AnalyzeCallOperands(TheCall, CC_MSP430);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at RegsToPass
+ // vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+ SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ StackPtr,
+ DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(),
+ VA.getLocMemOffset()));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store nodes are
+ // independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain and
+ // flag operands which copy the outgoing args into registers. The InFlag in
+ // necessary since all emited instructions must be stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode*
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallSDNode *TheCall,
+ unsigned CallingConv,
+ SelectionDAG &DAG) {
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_MSP430);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+ SelectionDAG &DAG) {
+ unsigned Opc = Op.getOpcode();
+ SDNode* N = Op.getNode();
+ MVT VT = Op.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ // We currently only lower shifts of constant argument.
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return SDValue();
+
+ uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+ // Expand the stuff into sequence of shifts.
+ // FIXME: for some shift amounts this might be done better!
+ // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+ SDValue Victim = N->getOperand(0);
+
+ if (Opc == ISD::SRL && ShiftAmount) {
+ // Emit a special goodness here:
+ // srl A, 1 => clrc; rrc A
+ Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+ ShiftAmount -= 1;
+ }
+
+ while (ShiftAmount--)
+ Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+ dl, VT, Victim);
+
+ return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+ // Create the TargetGlobalAddress node, folding in the constant offset.
+ SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+ getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+ return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC,
+ ISD::CondCode CC,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // FIXME: Handle bittests someday
+ assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+ // FIXME: Handle jump negative someday
+ TargetCC = MSP430::COND_INVALID;
+ switch (CC) {
+ default: assert(0 && "Invalid integer condition!");
+ case ISD::SETEQ:
+ TargetCC = MSP430::COND_E; // aka COND_Z
+ break;
+ case ISD::SETNE:
+ TargetCC = MSP430::COND_NE; // aka COND_NZ
+ break;
+ case ISD::SETULE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETUGE:
+ TargetCC = MSP430::COND_HS; // aka COND_C
+ break;
+ case ISD::SETUGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETULT:
+ TargetCC = MSP430::COND_LO; // aka COND_NC
+ break;
+ case ISD::SETLE:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETGE:
+ TargetCC = MSP430::COND_GE;
+ break;
+ case ISD::SETGT:
+ std::swap(LHS, RHS); // FALLTHROUGH
+ case ISD::SETLT:
+ TargetCC = MSP430::COND_L;
+ break;
+ }
+
+ return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned TargetCC = MSP430::COND_INVALID;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+ Chain,
+ Dest, DAG.getConstant(TargetCC, MVT::i8),
+ Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueV = Op.getOperand(2);
+ SDValue FalseV = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned TargetCC = MSP430::COND_INVALID;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueV);
+ Ops.push_back(FalseV);
+ Ops.push_back(DAG.getConstant(TargetCC, MVT::i8));
+ Ops.push_back(Flag);
+
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+ SelectionDAG &DAG) {
+ SDValue Val = Op.getOperand(0);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ assert(VT == MVT::i16 && "Only support i16 for now!");
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+ DAG.getValueType(Val.getValueType()));
+}
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RRA: return "MSP430ISD::RRA";
+ case MSP430ISD::RLA: return "MSP430ISD::RLA";
+ case MSP430ISD::RRC: return "MSP430ISD::RRC";
+ case MSP430ISD::CALL: return "MSP430ISD::CALL";
+ case MSP430ISD::Wrapper: return "MSP430ISD::Wrapper";
+ case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
+ case MSP430ISD::CMP: return "MSP430ISD::CMP";
+ case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == MSP430::Select16 ||
+ MI->getOpcode() == MSP430::Select8) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+ F->insert(I, copy0MBB);
+ F->insert(I, copy1MBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ copy1MBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(copy1MBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to copy1MBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(copy1MBB);
+
+ // copy1MBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = copy1MBB;
+ BuildMI(BB, dl, TII.get(MSP430::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 000000000000..404534dde89e
--- /dev/null
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,103 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace MSP430ISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ /// Y = R{R,L}A X, rotate right (left) arithmetically
+ RRA, RLA,
+
+ /// Y = RRC X, rotate right via carry
+ RRC,
+
+ /// CALL/TAILCALL - These operations represent an abstract call
+ /// instruction, which includes a bunch of information.
+ CALL,
+
+ /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+ /// and TargetGlobalAddress.
+ Wrapper,
+
+ /// CMP - Compare instruction.
+ CMP,
+
+ /// SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// instruction.
+ BR_CC,
+
+ /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3
+ /// is condition code and operand 4 is flag operand.
+ SELECT_CC
+ };
+ }
+
+ class MSP430Subtarget;
+ class MSP430TargetMachine;
+
+ class MSP430TargetLowering : public TargetLowering {
+ public:
+ explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShifts(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
+
+ SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG,
+ unsigned CC);
+ SDNode* LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+
+ MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ private:
+ const MSP430Subtarget &Subtarget;
+ const MSP430TargetMachine &TM;
+ };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 000000000000..61b339901648
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,67 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MSP430 instructions format here
+//
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, string asmstr> : Instruction {
+ field bits<16> Inst;
+
+ let Namespace = "MSP430";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, bit ad, bit bw, bits<2> as,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{12-15} = opcode;
+ let Inst{7} = ad;
+ let Inst{6} = bw;
+ let Inst{4-5} = as;
+}
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, bits<2> ad,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{7-15} = opcode;
+ let Inst{6} = bw;
+ let Inst{4-5} = ad;
+}
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond, bit s,
+ dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+
+ let Inst{13-15} = opcode;
+ let Inst{10-12} = cond;
+ let Inst{9} = s;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : MSP430Inst<outs, ins, asmstr> {
+ let Pattern = pattern;
+ let Inst{15-0} = 0;
+}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 000000000000..91112c3d732f
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,177 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430GenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+ : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)),
+ RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+ .addFrameIndex(FrameIdx).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ assert(0 && "Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (RC == &MSP430::GR16RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ else if (RC == &MSP430::GR8RegClass)
+ BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ else
+ assert(0 && "Cannot store this register to stack slot!");
+}
+
+bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SrcRC) {
+ unsigned Opc;
+ if (DestRC == &MSP430::GR16RegClass) {
+ Opc = MSP430::MOV16rr;
+ } else if (DestRC == &MSP430::GR8RegClass) {
+ Opc = MSP430::MOV8rr;
+ } else {
+ return false;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ return false;
+}
+
+bool
+MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers yet.
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case MSP430::MOV8rr:
+ case MSP430::MOV16rr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid register-register move instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+bool
+MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, get(MSP430::PUSH16r))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool
+MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg());
+
+ return true;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc operand
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "MSP430 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ assert(0 && "Implement conditional branches!");
+
+ return Count;
+}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 000000000000..e07aacad9dc2
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,84 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+namespace MSP430 {
+ // MSP430 specific condition code.
+ enum CondCode {
+ COND_E = 0, // aka COND_Z
+ COND_NE = 1, // aka COND_NZ
+ COND_HS = 2, // aka COND_C
+ COND_LO = 3, // aka COND_NC
+ COND_GE = 4,
+ COND_L = 5,
+
+ COND_INVALID
+ };
+}
+
+class MSP430InstrInfo : public TargetInstrInfoImpl {
+ const MSP430RegisterInfo RI;
+ MSP430TargetMachine &TM;
+public:
+ explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 000000000000..39c08e40be46
--- /dev/null
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,901 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+def MSP430callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def MSP430callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+ let PrintMethod = "printSrcMemOperand";
+ let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+ let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomDAGSchedInserter = 1 in {
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc),
+ "# Select8 PSEUDO",
+ [(set GR8:$dst,
+ (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc),
+ "# Select16 PSEUDO",
+ [(set GR16:$dst,
+ (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1 in {
+ def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// Direct branch
+let isBarrier = 1 in
+ def JMP : Pseudo<(outs), (ins brtarget:$dst),
+ "jmp\t$dst",
+ [(br bb:$dst)]>;
+
+// Conditional branches
+let Uses = [SRW] in
+ def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc),
+ "j$cc $dst",
+ [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. SPW is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [R12W, R13W, R14W, R15W, SRW],
+ Uses = [SPW] in {
+ def CALLi : Pseudo<(outs), (ins i16imm:$dst, variable_ops),
+ "call\t${dst:call}", [(MSP430call imm:$dst)]>;
+ def CALLr : Pseudo<(outs), (ins GR16:$dst, variable_ops),
+ "call\t$dst", [(MSP430call GR16:$dst)]>;
+ def CALLm : Pseudo<(outs), (ins memsrc:$dst, variable_ops),
+ "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+ }
+
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r : Pseudo<(outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r : Pseudo<(outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ []>;
+def MOV16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "mov.w\t{$src, $dst}",
+ []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Pseudo<(outs GR8:$dst), (ins i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Pseudo<(outs GR16:$dst), (ins i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def MOV8rm : Pseudo<(outs GR8:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
+ "mov.w\t{$src, $dst}",
+ [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : Pseudo<(outs GR16:$dst), (ins GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+ (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+
+def MOV8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "mov.b\t{$src, $dst}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "mov.w\t{$src, $dst}",
+ [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "mov.b\t{$src, $dst}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "mov.w\t{$src, $dst}",
+ [(store GR16:$src, addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let isTwoAddress = 1 in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
+// FIXME: Provide proper encoding!
+def ADD8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def ADD16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def ADD8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "add.b\t{$src2, $dst}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def ADD16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "add.w\t{$src2, $dst}",
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def ADD8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADD8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "add.b\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADD16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "add.w\t{$src, $dst}",
+ [(store (add (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
+def ADC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def ADC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def ADC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def ADC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "addc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def ADC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "addc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def ADC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def ADC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "addc.b\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def ADC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "addc.w\t{$src, $dst}",
+ [(store (adde (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
+def AND8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def AND16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def AND8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def AND16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def AND8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "and.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "and.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def AND16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def AND16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def AND8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "and.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "and.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
+def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def XOR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+
+def XOR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def XOR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def XOR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "xor.b\t{$src2, $dst}",
+ [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "xor.w\t{$src2, $dst}",
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def XOR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "xor.b\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def XOR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "xor.w\t{$src, $dst}",
+ [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+
+def SUB8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def SUB16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SUB8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def SUB16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def SUB8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "sub.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "sub.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SUB8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "sub.b\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SUB16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "sub.w\t{$src, $dst}",
+ [(store (sub (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def SBC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+
+def SBC8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def SBC16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def SBC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "subc.b\t{$src2, $dst}",
+ [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def SBC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "subc.w\t{$src2, $dst}",
+ [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+let isTwoAddress = 0 in {
+def SBC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def SBC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "subc.b\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def SBC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "subc.w\t{$src, $dst}",
+ [(store (sube (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: Provide proper encoding!
+def SAR8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "rra.b\t$dst",
+ [(set GR8:$dst, (MSP430rra GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "rra.w\t$dst",
+ [(set GR16:$dst, (MSP430rra GR16:$src)),
+ (implicit SRW)]>;
+
+def SHL8r1 : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "rla.b\t$dst",
+ [(set GR8:$dst, (MSP430rla GR8:$src)),
+ (implicit SRW)]>;
+def SHL16r1 : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "rla.w\t$dst",
+ [(set GR16:$dst, (MSP430rla GR16:$src)),
+ (implicit SRW)]>;
+
+def SAR8r1c : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+ "clrc\n\t"
+ "rrc.b\t$dst",
+ [(set GR8:$dst, (MSP430rrc GR8:$src)),
+ (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "clrc\n\t"
+ "rrc.w\t$dst",
+ [(set GR16:$dst, (MSP430rrc GR16:$src)),
+ (implicit SRW)]>;
+
+def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "sxt\t$dst",
+ [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+ (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "swpb\t$dst",
+ [(set GR16:$dst, (bswap GR16:$src))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+}
+
+def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+
+def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+
+let isTwoAddress = 0 in {
+def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit SRW)]>;
+def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit SRW)]>;
+
+def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst),
+ (implicit SRW)]>;
+
+def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (implicit SRW)]>;
+}
+
+} // isTwoAddress = 1
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ir : Pseudo<(outs), (ins i8imm:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp imm:$src1, GR8:$src2), (implicit SRW)]>;
+def CMP16ir : Pseudo<(outs), (ins i16imm:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp imm:$src1, GR16:$src2), (implicit SRW)]>;
+
+def CMP8im : Pseudo<(outs), (ins i8imm:$src1, memsrc:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp (i8 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
+def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>;
+
+// FIXME: imm is allowed only on src operand, not on dst.
+
+//def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+// "cmp.b\t{$src1, $src2}",
+// [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
+//def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+// "cmp.w\t{$src1, $src2}",
+// [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
+
+//def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+// "cmp.b\t{$src1, $src2}",
+// [(MSP430cmp (load addr:$src1), (i8 imm:$src2)), (implicit SRW)]>;
+//def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+// "cmp.w\t{$src1, $src2}",
+// [(MSP430cmp (load addr:$src1), (i16 imm:$src2)), (implicit SRW)]>;
+
+
+// Imm 0, +1, +2, +4, +8 are encoded via constant generator registers.
+// That's why we can use them as dest operands.
+// We don't define new class for them, since they would need special encoding
+// in the future.
+
+def CMP8ri0 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #0}",
+ [(MSP430cmp GR8:$src1, 0), (implicit SRW)]>;
+def CMP16ri0: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #0}",
+ [(MSP430cmp GR16:$src1, 0), (implicit SRW)]>;
+def CMP8ri1 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #1}",
+ [(MSP430cmp GR8:$src1, 1), (implicit SRW)]>;
+def CMP16ri1: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #1}",
+ [(MSP430cmp GR16:$src1, 1), (implicit SRW)]>;
+def CMP8ri2 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #2}",
+ [(MSP430cmp GR8:$src1, 2), (implicit SRW)]>;
+def CMP16ri2: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #2}",
+ [(MSP430cmp GR16:$src1, 2), (implicit SRW)]>;
+def CMP8ri4 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #4}",
+ [(MSP430cmp GR8:$src1, 4), (implicit SRW)]>;
+def CMP16ri4: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #4}",
+ [(MSP430cmp GR16:$src1, 4), (implicit SRW)]>;
+def CMP8ri8 : Pseudo<(outs), (ins GR8:$src1),
+ "cmp.b\t{$src1, #8}",
+ [(MSP430cmp GR8:$src1, 8), (implicit SRW)]>;
+def CMP16ri8: Pseudo<(outs), (ins GR16:$src1),
+ "cmp.w\t{$src1, #8}",
+ [(MSP430cmp GR16:$src1, 8), (implicit SRW)]>;
+
+def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>;
+def CMP16rm : Pseudo<(outs), (ins GR16:$src1, memsrc:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp GR16:$src1, (load addr:$src2)), (implicit SRW)]>;
+
+def CMP8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
+ "cmp.b\t{$src1, $src2}",
+ [(MSP430cmp (load addr:$src1), GR8:$src2), (implicit SRW)]>;
+def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
+ "cmp.w\t{$src1, $src2}",
+ [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
+
+def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #0}",
+ [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>;
+def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #0}",
+ [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>;
+def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #1}",
+ [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>;
+def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #1}",
+ [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>;
+def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #2}",
+ [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>;
+def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #2}",
+ [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>;
+def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #4}",
+ [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>;
+def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #4}",
+ [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>;
+def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.b\t{$src1, #8}",
+ [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>;
+def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1),
+ "cmp.w\t{$src1, #8}",
+ [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(anyext addr:$src), (MOVZX16rr8 GR8:$src)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+
+def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src1, texternalsym:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+ (MOV16mi addr:$dst, texternalsym:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+ (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+ (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src1, GR16:$src2),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(addc GR16:$src1, (load addr:$src2)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(addc GR16:$src1, imm:$src2),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+ (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src1, GR8:$src2),
+ (ADD8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(addc GR8:$src1, (load addr:$src2)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(addc GR8:$src1, imm:$src2),
+ (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+ (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src1, GR16:$src2),
+ (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(subc GR16:$src1, (load addr:$src2)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(subc GR16:$src1, imm:$src2),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+ (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+ (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src1, GR8:$src2),
+ (SUB8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(subc GR8:$src1, (load addr:$src2)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(subc GR8:$src1, imm:$src2),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+ (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+ (SUB8mm addr:$dst, addr:$src)>;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 000000000000..b94d7e44cace
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,39 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+public:
+ MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+ MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 000000000000..ef6f99756c2e
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,355 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+ TM(tm), TII(tii) {
+ StackAlign = TM.getFrameInfo()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ 0
+ };
+
+ return CalleeSavedRegClasses;
+}
+
+BitVector
+MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+
+ // Mark 4 special registers as reserved.
+ Reserved.set(MSP430::PCW);
+ Reserved.set(MSP430::SPW);
+ Reserved.set(MSP430::SRW);
+ Reserved.set(MSP430::CGW);
+
+ // Mark frame pointer as reserved if needed.
+ if (hasFP(MF))
+ Reserved.set(MSP430::FPW);
+
+ return Reserved;
+}
+
+const TargetRegisterClass* MSP430RegisterInfo::getPointerRegClass() const {
+ return &MSP430::GR16RegClass;
+}
+
+
+bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+ // adjcallstackdown instruction into 'add SPW, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount)
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(Amount);
+ }
+
+ if (New) {
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+ MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+ // The SRW implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ // Skip the saved PC
+ Offset += 2;
+
+ if (!hasFP(MF))
+ Offset += MF.getFrameInfo()->getStackSize();
+ else
+ Offset += 2; // Skip the saved FPW
+
+ // Fold imm into offset
+ Offset += MI.getOperand(i+1).getImm();
+
+ if (MI.getOpcode() == MSP430::ADD16ri) {
+ // This is actually "load effective address" of the stack slot
+ // instruction. We have only two-address instructions, thus we need to
+ // expand it into mov + add
+
+ MI.setDesc(TII.get(MSP430::MOV16rr));
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ if (Offset == 0)
+ return;
+
+ // We need to materialize the offset via add instruction.
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (Offset < 0)
+ BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ .addReg(DstReg).addImm(-Offset);
+ else
+ BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ .addReg(DstReg).addImm(Offset);
+
+ return;
+ }
+
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ // Create a frame entry for the FPW register that must be saved.
+ if (hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
+
+
+void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save FPW into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+ .addReg(MSP430::FPW, RegState::Kill);
+
+ // Update FPW with the new base value...
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+ .addReg(MSP430::SPW);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(MSP430::FPW);
+
+ } else
+ NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+ // If there is an SUB16ri of SPW immediately before this instruction, merge
+ // the two.
+ //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+ // If there is an ADD16ri or SUB16ri of SPW immediately after this
+ // instruction, merge the two instructions.
+ // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case MSP430::RET: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - 2;
+ NumBytes = FrameSize - CSSize;
+
+ // pop FPW.
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+ } else
+ NumBytes = StackSize - CSSize;
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD16ri or SUB16ri of SPW immediately before this
+ // instruction, merge the two instructions.
+ //if (NumBytes || MFI->hasVarSizedObjects())
+ // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (MFI->hasVarSizedObjects()) {
+ assert(0 && "Not implemented yet!");
+ } else {
+ // adjust stack pointer back: SPW += numbytes
+ if (NumBytes) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+ .addReg(MSP430::SPW).addImm(NumBytes);
+ // The SRW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ }
+}
+
+unsigned MSP430RegisterInfo::getRARegister() const {
+ return MSP430::PCW;
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
+
+int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "Not implemented yet!");
+}
+
+#include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 000000000000..a210e36e001d
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,70 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MSP430GenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+ MSP430TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+public:
+ MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+ const TargetRegisterClass* getPointerRegClass() const;
+
+ bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 000000000000..4078626ea2dd
--- /dev/null
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,122 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<4> Num = num;
+ let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+def PCB : MSP430Reg<0, "r0">;
+def SPB : MSP430Reg<1, "r1">;
+def SRB : MSP430Reg<2, "r2">;
+def CGB : MSP430Reg<3, "r3">;
+def FPB : MSP430Reg<4, "r4">;
+def R5B : MSP430Reg<5, "r5">;
+def R6B : MSP430Reg<6, "r6">;
+def R7B : MSP430Reg<7, "r7">;
+def R8B : MSP430Reg<8, "r8">;
+def R9B : MSP430Reg<9, "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
+def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
+def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
+def CGW : MSP430RegWithSubregs<3, "r3", [CGB]>;
+def FPW : MSP430RegWithSubregs<4, "r4", [FPB]>;
+def R5W : MSP430RegWithSubregs<5, "r5", [R5B]>;
+def R6W : MSP430RegWithSubregs<6, "r6", [R6B]>;
+def R7W : MSP430RegWithSubregs<7, "r7", [R7B]>;
+def R8W : MSP430RegWithSubregs<8, "r8", [R8B]>;
+def R9W : MSP430RegWithSubregs<9, "r9", [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+
+def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW,
+ R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+ [PCB, SPB, SRB, CGB, FPB,
+ R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def subreg_8bit : PatLeaf<(i32 1)>;
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+ // Volatile registers
+ [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+ // Frame pointer, sometimes allocable
+ FPB,
+ // Volatile, but not allocable
+ PCB, SPB, SRB, CGB]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Depending on whether the function uses frame pointer or not, last 5 or 4
+ // registers on the list above are reserved
+ if (RI->hasFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+ // Volatile registers
+ [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+ // Frame pointer, sometimes allocable
+ FPW,
+ // Volatile, but not allocable
+ PCW, SPW, SRW, CGW]>
+{
+ let SubRegClassList = [GR8];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR16Class::iterator
+ GR16Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ // Depending on whether the function uses frame pointer or not, last 5 or 4
+ // registers on the list above are reserved
+ if (RI->hasFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 000000000000..ef9e10339bc3
--- /dev/null
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,27 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "MSP430GenSubtarget.inc"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS) {
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 000000000000..96c8108b71bc
--- /dev/null
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,41 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+class TargetMachine;
+
+class MSP430Subtarget : public TargetSubtarget {
+ bool ExtendedInsts;
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ MSP430Subtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.cpp b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp
new file mode 100644
index 000000000000..ab181de13f94
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430TargetAsmInfo.cpp - MSP430 asm properties -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430TargetAsmInfo.h"
+#include "MSP430TargetMachine.h"
+
+using namespace llvm;
+
+MSP430TargetAsmInfo::MSP430TargetAsmInfo(const MSP430TargetMachine &TM)
+ : ELFTargetAsmInfo(TM) {
+ AlignmentIsInBytes = false;
+}
diff --git a/lib/Target/MSP430/MSP430TargetAsmInfo.h b/lib/Target/MSP430/MSP430TargetAsmInfo.h
new file mode 100644
index 000000000000..b58d5c9c764c
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- MSP430TargetAsmInfo.h - MSP430 asm properties -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class MSP430TargetMachine;
+
+ struct MSP430TargetAsmInfo : public ELFTargetAsmInfo {
+ explicit MSP430TargetAsmInfo(const MSP430TargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 000000000000..78869463f3a2
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,76 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetAsmInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+
+using namespace llvm;
+
+/// MSP430TargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MSP430TargetMachineModule;
+int MSP430TargetMachineModule = 0;
+
+
+// Register the targets
+static RegisterTarget<MSP430TargetMachine>
+X("msp430", "MSP430 [experimental]");
+
+MSP430TargetMachine::MSP430TargetMachine(const Module &M,
+ const std::string &FS) :
+ Subtarget(*this, M, FS),
+ // FIXME: Check TargetData string.
+ DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
+ InstrInfo(*this), TLInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
+
+const TargetAsmInfo *MSP430TargetMachine::createTargetAsmInfo() const {
+ return new MSP430TargetAsmInfo(*this);
+}
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createMSP430ISelDag(*this, OptLevel));
+ return false;
+}
+
+bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+unsigned MSP430TargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+
+ // We strongly match msp430
+ if (TT.size() >= 6 && TT[0] == 'm' && TT[1] == 's' && TT[2] == 'p' &&
+ TT[3] == '4' && TT[4] == '3' && TT[5] == '0')
+ return 20;
+
+ return 0;
+}
+
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 000000000000..d9ffa2b5ac8f
--- /dev/null
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,68 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+ MSP430Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+
+ // MSP430 does not have any call stack frame, therefore not having
+ // any MSP430 specific FrameInfo class.
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ MSP430TargetMachine(const Module &M, const std::string &FS);
+
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual MSP430TargetLowering *getTargetLowering() const {
+ return const_cast<MSP430TargetLowering*>(&TLInfo);
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel, bool Verbose,
+ raw_ostream &Out);
+ static unsigned getModuleMatchQuality(const Module &M);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
new file mode 100644
index 000000000000..45cb3aa45b85
--- /dev/null
+++ b/lib/Target/MSP430/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/MSP430/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSP430
+TARGET = MSP430
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
+ MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \
+ MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \
+ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
+ MSP430GenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt
new file mode 100644
index 000000000000..b14e93d84d6a
--- /dev/null
+++ b/lib/Target/MSP430/README.txt
@@ -0,0 +1,42 @@
+//===---------------------------------------------------------------------===//
+// MSP430 backend.
+//===---------------------------------------------------------------------===//
+
+DISCLAIMER: Thid backend should be considered as highly experimental. I never
+seen nor worked with this MCU, all information was gathered from datasheet
+only. The original intention of making this backend was to write documentation
+of form "How to write backend for dummies" :) Thes notes hopefully will be
+available pretty soon.
+
+Some things are incomplete / not implemented yet (this list surely is not
+complete as well):
+
+0. Implement asmprinting for variables :)
+
+1. Verify, how stuff is handling implicit zext with 8 bit operands (this might
+be modelled currently in improper way - should we need to mark the superreg as
+def for every 8 bit instruction?).
+
+2. Libcalls: multiplication, division, remainder. Note, that calling convention
+for libcalls is incomptible with calling convention of libcalls of msp430-gcc
+(these cannot be used though due to license restriction).
+
+3. Implement multiplication / division by constant (dag combiner hook?).
+
+4. Implement non-constant shifts.
+
+5. Implement varargs stuff.
+
+6. Verify and fix (if needed) how's stuff playing with i32 / i64.
+
+7. Implement floating point stuff (softfp?)
+
+8. Implement instruction encoding for (possible) direct code emission in the
+future.
+
+9. Since almost all instructions set flags - implement brcond / select in better
+way (currently they emit explicit comparison).
+
+10. Handle imm in comparisons in better way (see comment in MSP430InstrInfo.td)
+
+11. Implement hooks for better memory op folding, etc.
diff --git a/lib/Target/Makefile b/lib/Target/Makefile
new file mode 100644
index 000000000000..50a360f1f868
--- /dev/null
+++ b/lib/Target/Makefile
@@ -0,0 +1,20 @@
+#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMTarget
+BUILD_ARCHIVE = 1
+
+# We include this early so we can access the value of TARGETS_TO_BUILD as the
+# value for PARALLEL_DIRS which must be set before Makefile.rules is included
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := $(TARGETS_TO_BUILD)
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..6a868c2fc78c
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,12 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_partially_linked_object(LLVMMipsAsmPrinter
+ MipsAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMMipsCodeGen n)
+
+add_dependencies(LLVMMipsAsmPrinter ${n})
diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile
new file mode 100644
index 000000000000..a2fecf44e8e1
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' Mips target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
new file mode 100644
index 000000000000..dfb62382e75d
--- /dev/null
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -0,0 +1,580 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter {
+ const MipsSubtarget *Subtarget;
+ public:
+ explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "Mips Assembly Printer";
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printUnsignedImm(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printFCCOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ void printSavedRegsBitmask(MachineFunction &MF);
+ void printHex32(unsigned int Value);
+
+ const char *emitCurrentABIString(void);
+ void emitFunctionStart(MachineFunction &MF);
+ void emitFunctionEnd(MachineFunction &MF);
+ void emitFrameDirective(MachineFunction &MF);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "MipsGenAsmWriter.inc"
+
+/// createMipsCodePrinterPass - Returns a pass that prints the MIPS
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o,
+ MipsTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Mips Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "(f)mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame $fp,48,$ra
+// .mask 0xc0000000,-8
+// addiu $sp, $sp, -48
+// sw $ra, 40($sp)
+// sw $fp, 36($sp)
+//
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (RA) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::
+printSavedRegsBitmask(MachineFunction &MF)
+{
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ // CPU and FPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+ unsigned int FPUBitmask = 0;
+
+ // Set the CPU and FPU Bitmasks
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg());
+ if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ CPUBitmask |= (1 << RegNum);
+ else
+ FPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (RI.hasFP(MF))
+ CPUBitmask |= (1 << MipsRegisterInfo::
+ getRegisterNumbering(RI.getFrameRegister(MF)));
+
+ if (MF.getFrameInfo()->hasCalls())
+ CPUBitmask |= (1 << MipsRegisterInfo::
+ getRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask); O << ','
+ << MipsFI->getCPUTopSavedRegOff() << '\n';
+
+ // Print FPUBitmask
+ O << "\t.fmask\t"; printHex32(FPUBitmask); O << ","
+ << MipsFI->getFPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::
+printHex32(unsigned int Value)
+{
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) );
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::
+emitFrameDirective(MachineFunction &MF)
+{
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF.getFrameInfo()->getStackSize();
+
+
+ O << "\t.frame\t" << '$' << LowercaseString(RI.get(stackReg).AsmName)
+ << ',' << stackSize << ','
+ << '$' << LowercaseString(RI.get(returnReg).AsmName)
+ << '\n';
+}
+
+/// Emit Set directives.
+const char * MipsAsmPrinter::
+emitCurrentABIString(void)
+{
+ switch(Subtarget->getTargetABI()) {
+ case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::O64: return "abiO64";
+ case MipsSubtarget::N32: return "abiN32";
+ case MipsSubtarget::N64: return "abi64";
+ case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+ default: break;
+ }
+
+ assert(0 && "Unknown Mips ABI");
+ return NULL;
+}
+
+/// Emit the directives used by GAS on the start of functions
+void MipsAsmPrinter::
+emitFunctionStart(MachineFunction &MF)
+{
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // 2 bits aligned
+ EmitAlignment(2, F);
+
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << "\t.ent\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if ((TAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux())
+ O << "\t.type\t" << CurrentFnName << ", @function\n";
+
+ O << CurrentFnName << ":\n";
+
+ emitFrameDirective(MF);
+ printSavedRegsBitmask(MF);
+
+ O << '\n';
+}
+
+/// Emit the directives used by GAS on the end of functions
+void MipsAsmPrinter::
+emitFunctionEnd(MachineFunction &MF)
+{
+ // There are instruction for this macros, but they must
+ // always be at the function end, and we can't emit and
+ // break with BB logic.
+ O << "\t.set\tmacro\n";
+ O << "\t.set\treorder\n";
+
+ O << "\t.end\t" << CurrentFnName << '\n';
+ if (TAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+bool MipsAsmPrinter::
+runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O << "\n\n";
+
+ // Emit the function start directives
+ emitFunctionStart(MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+
+ // Each Basic Block is separated by a newline
+ O << '\n';
+ }
+
+ // Emit function end directives
+ emitFunctionEnd(MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode)
+{
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void MipsAsmPrinter::
+printOperand(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ bool closeP = false;
+ bool isPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
+
+ // %hi and %lo used on mips gas to load global addresses on
+ // static code. %got is used to load global addresses when
+ // using PIC_. %call16 is used to load direct call targets
+ // on PIC_ and small code size. %call_lo and %call_hi load
+ // direct call targets on PIC_ and large code size.
+ if (MI->getOpcode() == Mips::LUi && !MO.isReg() && !MO.isImm()) {
+ if ((isPIC) && (isCodeLarge))
+ O << "%call_hi(";
+ else
+ O << "%hi(";
+ closeP = true;
+ } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isReg() && !MO.isImm()) {
+ const MachineOperand &firstMO = MI->getOperand(opNum-1);
+ if (firstMO.getReg() == Mips::GP)
+ O << "%gp_rel(";
+ else
+ O << "%lo(";
+ closeP = true;
+ } else if ((isPIC) && (MI->getOpcode() == Mips::LW) &&
+ (!MO.isReg()) && (!MO.isImm())) {
+ const MachineOperand &firstMO = MI->getOperand(opNum-1);
+ const MachineOperand &lastMO = MI->getOperand(opNum+1);
+ if ((firstMO.isReg()) && (lastMO.isReg())) {
+ if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() == Mips::GP)
+ && (!isCodeLarge))
+ O << "%call16(";
+ else if ((firstMO.getReg() != Mips::T9) && (lastMO.getReg() == Mips::GP))
+ O << "%got(";
+ else if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() != Mips::GP)
+ && (isCodeLarge))
+ O << "%call_lo(";
+ closeP = true;
+ }
+ }
+
+ switch (MO.getType())
+ {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << '$' << LowercaseString (RI.get(MO.getReg()).AsmName);
+ else
+ O << '$' << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (short int)MO.getImm();
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ }
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ break;
+
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+
+ if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::
+printUnsignedImm(const MachineInstr *MI, int opNum)
+{
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << (unsigned short int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier)
+{
+ // when using stack locations for not load/store instructions
+ // print the same way as all normal 3 operand instructions.
+ if (Modifier && !strcmp(Modifier, "stackloc")) {
+ printOperand(MI, opNum+1);
+ O << ", ";
+ printOperand(MI, opNum);
+ return;
+ }
+
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
+ // pattern lw $25,%call16($28)
+ printOperand(MI, opNum);
+ O << "(";
+ printOperand(MI, opNum+1);
+ O << ")";
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier)
+{
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+bool MipsAsmPrinter::
+doInitialization(Module &M)
+{
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+
+ // Tell the assembler which ABI we are using
+ O << "\t.section .mdebug." << emitCurrentABIString() << '\n';
+
+ // TODO: handle O64 ABI
+ if (Subtarget->isABI_EABI())
+ O << "\t.section .gcc_compiled_long" <<
+ (Subtarget->isGP32bit() ? "32" : "64") << '\n';
+
+ // return to previous section
+ O << "\t.previous" << '\n';
+
+ return false; // success
+}
+
+void MipsAsmPrinter::
+printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *CTy = C->getType();
+ unsigned Size = TD->getTypeAllocSize(CTy);
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ bool printSizeAndType = true;
+
+ // A data structure or array is aligned in memory to the largest
+ // alignment boundary required by any data type inside it (this matches
+ // the Preferred Type Alignment). For integral types, the alignment is
+ // the type size.
+ unsigned Align;
+ if (CTy->getTypeID() == Type::IntegerTyID ||
+ CTy->getTypeID() == Type::VoidTyID) {
+ assert(!(Size & (Size-1)) && "Alignment is not a power of two!");
+ Align = Log2_32(Size);
+ } else
+ Align = TD->getPreferredTypeAlignmentShift(CTy);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (1 << Align);
+
+ O << '\n';
+ return;
+ }
+ }
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of their name
+ // or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // Fall Through
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ if (CVA && CVA->isCString())
+ printSizeAndType = false;
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective() && printSizeAndType) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+bool MipsAsmPrinter::
+doFinalization(Module &M)
+{
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ printModuleLevelGV(I);
+
+ O << '\n';
+
+ return AsmPrinter::doFinalization(M);
+}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
new file mode 100644
index 000000000000..70c7a51c2850
--- /dev/null
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LLVM_TARGET_DEFINITIONS Mips.td)
+
+tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MipsGenRegisterNames.inc -gen-register-enums)
+tablegen(MipsGenRegisterInfo.inc -gen-register-desc)
+tablegen(MipsGenInstrNames.inc -gen-instr-enums)
+tablegen(MipsGenInstrInfo.inc -gen-instr-desc)
+tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
+tablegen(MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(MipsGenCallingConv.inc -gen-callingconv)
+tablegen(MipsGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MipsCodeGen
+ MipsDelaySlotFiller.cpp
+ MipsInstrInfo.cpp
+ MipsISelDAGToDAG.cpp
+ MipsISelLowering.cpp
+ MipsRegisterInfo.cpp
+ MipsSubtarget.cpp
+ MipsTargetAsmInfo.cpp
+ MipsTargetMachine.cpp
+ )
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
new file mode 100644
index 000000000000..48ab5f994704
--- /dev/null
+++ b/lib/Target/Mips/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMipsCodeGen
+TARGET = Mips
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
+ MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
+ MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+ MipsGenDAGISel.inc MipsGenCallingConv.inc \
+ MipsGenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
new file mode 100644
index 000000000000..0accb4e347ee
--- /dev/null
+++ b/lib/Target/Mips/Mips.h
@@ -0,0 +1,41 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MipsTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+ FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+ FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsCodePrinterPass(raw_ostream &OS,
+ MipsTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for Mips registers. This defines a mapping from
+// register name to register number.
+#include "MipsGenRegisterNames.inc"
+
+// Defines symbolic names for the Mips instructions.
+#include "MipsGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
new file mode 100644
index 000000000000..79a78d86aef7
--- /dev/null
+++ b/lib/Target/Mips/Mips.td
@@ -0,0 +1,88 @@
+//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
+ "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
+ "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+ "true", "Only supports single precision float">;
+def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+ "Mips1 ISA Support">;
+def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+ "Mips2 ISA Support">;
+def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
+ "Enable o32 ABI">;
+def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
+ "Enable eabi ABI">;
+def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
+ "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+ "Enable 'signext in register' instructions.">;
+def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
+ "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+ "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax : SubtargetFeature<"minmax", "HasMinMax", "true",
+ "Enable 'min/max' instructions.">;
+def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
+ "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
+ "Enable 'count leading bits' instructions.">;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"r2000", [FeatureMips1]>;
+def : Proc<"r3000", [FeatureMips1]>;
+
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"r6000", [FeatureMips2]>;
+
+// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+ FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
+ FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+
+def Mips : Target {
+ let InstructionSet = MipsInstrInfo;
+}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 000000000000..01fe92e6b73c
--- /dev/null
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,86 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+ // The first 4 doubl fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Integer values get stored in stack slots that are 8 bytes in
+ // size and 8-byte aligned.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+ // i32 are returned in registers V0, V1
+ CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+ // f32 are returned in registers F0, F1
+ CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+ // f64 are returned in register D0
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+ CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 000000000000..a2b615d8add2
--- /dev/null
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,77 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 000000000000..f05ac702ccdd
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,392 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class VISIBILITY_HIDDEN MipsDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MipsTargetMachine.
+ MipsTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+ virtual void InstructionSelect();
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ SDValue getGlobalBaseReg();
+ SDNode *Select(SDValue N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Offset);
+
+
+ // getI32Imm - Return a target constant with the specified
+ // value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+
+ #ifndef NDEBUG
+ unsigned Indent;
+ #endif
+};
+
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void MipsDAGToDAGISel::
+InstructionSelect()
+{
+ DEBUG(BB->dump());
+ // Codegen the basic block.
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+ #endif
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+
+ #ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+ #endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDValue MipsDAGToDAGISel::getGlobalBaseReg() {
+ MachineFunction* MF = BB->getParent();
+ unsigned GP = 0;
+ for(MachineRegisterInfo::livein_iterator ii = MF->getRegInfo().livein_begin(),
+ ee = MF->getRegInfo().livein_end(); ii != ee; ++ii)
+ if (ii->first == Mips::GP) {
+ GP = ii->second;
+ break;
+ }
+ assert(GP && "GOT PTR not in liveins");
+ // FIXME is there a sensible place to get debug info for this?
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ DebugLoc::getUnknownLoc(), GP, MVT::i32);
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
+{
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetJumpTable)){
+ Base = CurDAG->getRegister(Mips::GP, MVT::i32);
+ Offset = Addr;
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_immSExt16(CN)) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::
+Select(SDValue N)
+{
+ SDNode *Node = N.getNode();
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ #ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+ #endif
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); Opc=Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::ADDu;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SUBu;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ MVT VT = LHS.getValueType();
+ SDNode *Carry = CurDAG->getTargetNode(Mips::SLTu, dl, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getTargetNode(Mips::ADDu, dl, VT,
+ SDValue(Carry,0), RHS);
+
+ return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag,
+ LHS, SDValue(AddCarry,0));
+ }
+
+ /// Mul/Div with two results
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op;
+ if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI)
+ Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
+
+ SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+
+ SDValue InFlag = SDValue(Node, 0);
+ SDNode *Lo = CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32,
+ MVT::Flag, InFlag);
+ InFlag = SDValue(Lo,1);
+ SDNode *Hi = CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+ if (!N.getValue(0).use_empty())
+ ReplaceUses(N.getValue(0), SDValue(Lo,0));
+
+ if (!N.getValue(1).use_empty())
+ ReplaceUses(N.getValue(1), SDValue(Hi,0));
+
+ return NULL;
+ }
+
+ /// Special Muls
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ SDValue MulOp1 = Node->getOperand(0);
+ SDValue MulOp2 = Node->getOperand(1);
+
+ unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ SDNode *MulNode = CurDAG->getTargetNode(MulOp, dl,
+ MVT::Flag, MulOp1, MulOp2);
+
+ SDValue InFlag = SDValue(MulNode, 0);
+
+ if (MulOp == ISD::MUL)
+ return CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, InFlag);
+ else
+ return CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag);
+ }
+
+ /// Div/Rem operations
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+
+ unsigned Op, MOp;
+ if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) {
+ Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu);
+ MOp = Mips::MFLO;
+ } else {
+ Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
+ MOp = Mips::MFHI;
+ }
+ SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2);
+
+ SDValue InFlag = SDValue(Node, 0);
+ return CurDAG->getTargetNode(MOp, dl, MVT::i32, InFlag);
+ }
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDValue Result = getGlobalBaseReg();
+ ReplaceUses(N, Result);
+ return NULL;
+ }
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MipsISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ //bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large);
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue T9Reg = CurDAG->getRegister(Mips::T9, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ( (isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)) )
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getTargetNode(Mips::LW, dl, MVT::i32,
+ MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getTargetNode(Mips::JALR, dl, MVT::Other,
+ MVT::Flag, T9Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(N);
+
+ #ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.getNode())
+ DEBUG(N.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+ #endif
+
+ return ResNode;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+ return new MipsDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 000000000000..9281940019a9
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,1254 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+const char *MipsTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case MipsISD::JmpLink : return "MipsISD::JmpLink";
+ case MipsISD::Hi : return "MipsISD::Hi";
+ case MipsISD::Lo : return "MipsISD::Lo";
+ case MipsISD::GPRel : return "MipsISD::GPRel";
+ case MipsISD::Ret : return "MipsISD::Ret";
+ case MipsISD::CMov : return "MipsISD::CMov";
+ case MipsISD::SelectCC : return "MipsISD::SelectCC";
+ case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
+ case MipsISD::FPBrcond : return "MipsISD::FPBrcond";
+ case MipsISD::FPCmp : return "MipsISD::FPCmp";
+ case MipsISD::FPRound : return "MipsISD::FPRound";
+ default : return NULL;
+ }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
+{
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+ // Mips does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // JumpTable targets must use GOT when using PIC_
+ setUsesGlobalOffsetTable(true);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+ addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat())
+ if (!Subtarget->isFP64bit())
+ addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+
+ // Legal fp constants
+ addLegalFPImmediate(APFloat(+0.0f));
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+ // Mips Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
+ // with operands comming from setcc fp comparions. This is necessary since
+ // the result from these setcc are in a flag registers (FCR31).
+ setOperationAction(ISD::AND, MVT::i32, Custom);
+ setOperationAction(ISD::OR, MVT::i32, Custom);
+
+ // Operations not directly supported by Mips.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ if (Subtarget->isSingleFloat())
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ if (!Subtarget->hasSEInReg()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ }
+
+ if (!Subtarget->hasBitCount())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ if (!Subtarget->hasSwap())
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ setStackPointerRegisterToSaveRestore(Mips::SP);
+ computeRegisterProperties();
+}
+
+
+MVT MipsTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i32;
+}
+
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG)
+{
+ switch (Op.getOpcode())
+ {
+ case ISD::AND: return LowerANDOR(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::OR: return LowerANDOR(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value. It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+ assert(RC->contains(PReg) && "Not the correct regclass!");
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+bool MipsTargetLowering::IsInSmallSection(unsigned Size) {
+ return (Size > 0 && (Size <= Subtarget->getSSectionThreshold()));
+}
+
+// Discover if this global address can be placed into small data/bss section.
+bool MipsTargetLowering::IsGlobalInSmallSection(GlobalValue *GV)
+{
+ const TargetData *TD = getTargetData();
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+
+ if (!GVA)
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+
+ // if this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (GVA->hasInitializer() && GV->hasLocalLinkage()) {
+ Constant *C = GVA->getInitializer();
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (CVA && CVA->isCString())
+ return false;
+ }
+
+ return IsInSmallSection(Size);
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return Mips::BRANCH_T;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return Mips::BRANCH_F;
+
+ return Mips::BRANCH_INVALID;
+}
+
+static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
+ switch(BC) {
+ default:
+ assert(0 && "Unknown branch code");
+ case Mips::BRANCH_T : return Mips::BC1T;
+ case Mips::BRANCH_F : return Mips::BC1F;
+ case Mips::BRANCH_TL : return Mips::BC1TL;
+ case Mips::BRANCH_FL : return Mips::BC1FL;
+ }
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_EQ;
+ case ISD::SETUNE: return Mips::FCOND_OGL;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_NEQ;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ bool isFPCmp = false;
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case Mips::Select_FCC:
+ case Mips::Select_FCC_S32:
+ case Mips::Select_FCC_D32:
+ isFPCmp = true; // FALL THROUGH
+ case Mips::Select_CC:
+ case Mips::Select_CC_S32:
+ case Mips::Select_CC_D32: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ // Emit the right instruction according to the type of the operands compared
+ if (isFPCmp) {
+ // Find the condiction code present in the setcc operation.
+ Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm();
+ // Get the branch opcode from the branch code.
+ unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC));
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ } else
+ BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
+ .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MipsTargetLowering::
+LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG)
+{
+ if (!Subtarget->isMips1())
+ return Op;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CCReg = AddLiveIn(MF, Mips::FCR31, Mips::CCRRegisterClass);
+
+ SDValue Chain = DAG.getEntryNode();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Src = Op.getOperand(0);
+
+ // Set the condition register
+ SDValue CondReg = DAG.getCopyFromReg(Chain, dl, CCReg, MVT::i32);
+ CondReg = DAG.getCopyToReg(Chain, dl, Mips::AT, CondReg);
+ CondReg = DAG.getCopyFromReg(CondReg, dl, Mips::AT, MVT::i32);
+
+ SDValue Cst = DAG.getConstant(3, MVT::i32);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, CondReg, Cst);
+ Cst = DAG.getConstant(2, MVT::i32);
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i32, Or, Cst);
+
+ SDValue InFlag(0, 0);
+ CondReg = DAG.getCopyToReg(Chain, dl, Mips::FCR31, Xor, InFlag);
+
+ // Emit the round instruction and bit convert to integer
+ SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32,
+ Src, CondReg.getValue(1));
+ SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc);
+ return BitCvt;
+}
+
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get a reference from Mips stack pointer
+ SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+ // Subtract the dynamic size from the actual stack size to
+ // obtain the new stack size.
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+ // The Sub result contains the new stack start address, so it
+ // must be placed in the stack pointer register.
+ Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub);
+
+ // This node always has two return values: a new stack pointer
+ // value and a chain
+ SDValue Ops[2] = { Sub, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue MipsTargetLowering::
+LowerANDOR(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ LHS, True, False, LHS.getOperand(2));
+ SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ RHS, True, False, RHS.getOperand(2));
+
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG)
+{
+ // The first operand is the chain, the second is the condition, the third is
+ // the block to branch to if the condition is true.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
+ return Op;
+
+ SDValue CondRes = Op.getOperand(1);
+ SDValue CCNode = CondRes.getOperand(2);
+ Mips::CondCode CC =
+ (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+ SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+ return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+ Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSETCC(SDValue Op, SelectionDAG &DAG)
+{
+ // The operands to this are the left and right operands to compare (ops #0,
+ // and #1) and the condition code to compare them with (op #2) as a
+ // CondCodeSDNode.
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue True = Op.getOperand(1);
+ SDValue False = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // if the incomming condition comes from a integer compare, the select
+ // operation must be SelectCC or a conditional move if the subtarget
+ // supports it.
+ if (Cond.getOpcode() != MipsISD::FPCmp) {
+ if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
+ return Op;
+ return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
+ Cond, True, False);
+ }
+
+ // if the incomming condition comes from fpcmp, the select
+ // operation must use FPSelectCC.
+ SDValue CCNode = Cond.getOperand(2);
+ return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+ Cond, True, False, CCNode);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+ if (!Subtarget->hasABICall()) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue Ops[] = { GA };
+ // %gp_rel relocation
+ if (!isa<Function>(GV) && IsGlobalInSmallSection(GV)) {
+ SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, Ops, 1);
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+ }
+ // %hi/%lo relocation
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ } else { // Abicall relocations, TODO: make this cleaner.
+ SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), GA, NULL, 0);
+ // On functions and global targets not internal linked only
+ // a load from got/GP is necessary for PIC to work.
+ if (!GV->hasLocalLinkage() || isa<Function>(GV))
+ return ResNode;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+ }
+
+ assert(0 && "Dont know how to handle GlobalAddress");
+ return SDValue(0,0);
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+{
+ assert(0 && "TLS not implemented for MIPS.");
+ return SDValue(); // Not reached
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ SDVTList VTs = DAG.getVTList(MVT::i32);
+ SDValue Ops[] = { JTI };
+ HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1);
+ } else // Emit Load from Global Pointer
+ HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
+
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+ return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue ResNode;
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ // gp_rel relocation
+ // FIXME: we should reference the constant pool using small data sections,
+ // but the asm printer currently doens't support this feature without
+ // hacking it. This feature should come soon so we can uncomment the
+ // stuff below.
+ //if (!Subtarget->hasABICall() &&
+ // IsInSmallSection(getTargetData()->getTypeAllocSize(C->getType()))) {
+ // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+ // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+ //} else { // %hi/%lo relocation
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+ ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+ //}
+
+ return ResNode;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+// an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+// not used, it must be shadowed. If only A3 is avaiable, shadow it and
+// go to stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+ static const unsigned IntRegs[] = {
+ Mips::A0, Mips::A1, Mips::A2, Mips::A3
+ };
+ static const unsigned F32Regs[] = {
+ Mips::F12, Mips::F14
+ };
+ static const unsigned F64Regs[] = {
+ Mips::D6, Mips::D7
+ };
+
+ unsigned Reg=0;
+ unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
+ bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0)));
+
+ // Promote i8 and i16
+ if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+ LocVT = MVT::i32;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ IntRegUsed = true;
+ LocVT = MVT::i32;
+ }
+
+ if (ValVT.isFloatingPoint() && !IntRegUsed) {
+ if (ValVT == MVT::f32)
+ Reg = State.AllocateReg(F32Regs, FloatRegsSize);
+ else
+ Reg = State.AllocateReg(F64Regs, FloatRegsSize);
+ }
+
+ if (ValVT == MVT::f64 && IntRegUsed) {
+ if (UnallocIntReg != IntRegsSize) {
+ // If we hit register A3 as the first not allocated, we must
+ // mark it as allocated (shadow) and use the stack instead.
+ if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3)))
+ Reg = Mips::A2;
+ for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg)
+ State.AllocateReg(UnallocIntReg);
+ }
+ LocVT = MVT::i32;
+ }
+
+ if (!Reg) {
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ } else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+ return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+// CALL Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCALL - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MipsTargetLowering::
+LowerCALL(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ unsigned CC = TheCall->getCallingConv();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ // To meet O32 ABI, Mips must always allocate 16 bytes on
+ // the stack (even if less than 4 are used as arguments)
+ if (Subtarget->isABI_O32()) {
+ int VTsize = MVT(MVT::i32).getSizeInBits()/8;
+ MFI->CreateFixedObject(VTsize, (VTsize*3));
+ CCInfo.AnalyzeCallOperands(TheCall, CC_MipsO32);
+ } else
+ CCInfo.AnalyzeCallOperands(TheCall, CC_Mips);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ // With EABI is it possible to have 16 args on registers.
+ SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // First/LastArgStackLoc contains the first/last
+ // "at stack" argument location.
+ int LastArgStackLoc = 0;
+ unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ CCValAssign &VA = ArgLocs[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+ if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg);
+ if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+ DAG.getConstant(0, getPointerTy()));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+ DAG.getConstant(1, getPointerTy()));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+ continue;
+ }
+ }
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Create the frame index object for this incoming parameter
+ // This guarantees that when allocating Local Area the firsts
+ // 16 bytes which are alwayes reserved won't be overwritten
+ // if O32 ABI is used. For EABI the first address is zero.
+ LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastArgStackLoc);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+
+ // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Create a stack location to hold GP when PIC is used. This stack
+ // location is used on function prologue to save GP and also after all
+ // emited CALL's to restore GP.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // Function can have an arbitrary number of calls, so
+ // hold the LastArgStackLoc with the biggest offset.
+ int FI;
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ if (LastArgStackLoc >= MipsFI->getGPStackOffset()) {
+ LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4);
+ // Create the frame index only once. SPOffset here can be anything
+ // (this will be fixed on processFunctionBeforeFrameFinalized)
+ if (MipsFI->getGPStackOffset() == -1) {
+ FI = MFI->CreateFixedObject(4, 0);
+ MipsFI->setGPFI(FI);
+ }
+ MipsFI->setGPStackOffset(LastArgStackLoc);
+ }
+
+ // Reload GP value.
+ FI = MipsFI->getGPFI();
+ SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
+ SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0);
+ Chain = GPLoad.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
+ GPLoad, SDValue(0,0));
+ InFlag = Chain.getValue(1);
+ }
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *MipsTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+//===----------------------------------------------------------------------===//
+// FORMAL_ARGUMENTS Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg
+SDValue MipsTargetLowering::
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
+{
+ SDValue Root = Op.getOperand(0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+ // GP must be live into PIC and non-PIC call target.
+ AddLiveIn(MF, Mips::GP, Mips::CPURegsRegisterClass);
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ if (Subtarget->isABI_O32())
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MipsO32);
+ else
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_Mips);
+
+ SmallVector<SDValue, 16> ArgValues;
+ SDValue StackPtr;
+
+ unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = Mips::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = Mips::FGR32RegisterClass;
+ else if (RegVT == MVT::f64) {
+ if (!Subtarget->isSingleFloat())
+ RC = Mips::AFGR64RegisterClass;
+ } else
+ assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+ if (Subtarget->isABI_O32()) {
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+ if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+ unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+ VA.getLocReg()+1, RC);
+ SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg2, RegVT);
+ SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue);
+ SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2);
+ ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi);
+ }
+ }
+
+ ArgValues.push_back(ArgValue);
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame.
+ if ((isVarArg) && (Subtarget->isABI_O32())) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ int FI = MFI->CreateFixedObject(4, 0);
+ MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4)));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ ArgValues.push_back(DAG.getStore(Root, dl, ArgValue, PtrOff, NULL, 0));
+ }
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, 0);
+ MipsFI->recordLoadArgsFI(FI, -(ArgSize+
+ (FirstStackArgLoc + VA.getLocMemOffset())));
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MipsTargetLowering::
+LowerRET(SDValue Op, SelectionDAG &DAG)
+{
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Mips);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into $v0.
+ if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ unsigned Reg = MipsFI->getSRetReturnReg();
+
+ if (!Reg)
+ assert(0 && "sret virtual register not created in the entry block");
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on Mips is always a "jr $ra"
+ if (Flag.getNode())
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // Mips specific constrainy
+ // GCC config/mips/constraints.md
+ //
+ // 'd' : An address register. Equivalent to r
+ // unless generating MIPS16 code.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
+{
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, Mips::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, Mips::FGR32RegisterClass);
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return std::make_pair(0U, Mips::AFGR64RegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MipsTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ // GCC Mips Constraint Letters
+ case 'd':
+ case 'y':
+ return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
+ Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
+ Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::T8, 0);
+
+ case 'f':
+ if (VT == MVT::f32) {
+ if (Subtarget->isSingleFloat())
+ return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5,
+ Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11,
+ Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24,
+ Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+ Mips::F30, Mips::F31, 0);
+ else
+ return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
+ Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
+ Mips::F28, Mips::F30, 0);
+ }
+
+ if (VT == MVT::f64)
+ if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+ return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
+ Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
+ Mips::D14, Mips::D15, 0);
+ }
+ return std::vector<unsigned>();
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Mips target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 000000000000..55cd6eadd096
--- /dev/null
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,130 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+ namespace MipsISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Get the Higher 16 bits from a 32-bit immediate
+ // No relation with Mips Hi register
+ Hi,
+
+ // Get the Lower 16 bits from a 32-bit immediate
+ // No relation with Mips Lo register
+ Lo,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Conditional Move
+ CMov,
+
+ // Select CC Pseudo Instruction
+ SelectCC,
+
+ // Floating Point Select CC Pseudo Instruction
+ FPSelectCC,
+
+ // Floating Point Branch Conditional
+ FPBrcond,
+
+ // Floating Point Compare
+ FPCmp,
+
+ // Floating Point Rounding
+ FPRound,
+
+ // Return
+ Ret
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class MipsTargetLowering : public TargetLowering
+ {
+ // FrameIndex for return slot.
+ int ReturnAddrIndex;
+ public:
+
+ explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT getSetCCResultType(MVT VT) const;
+
+ private:
+ // Subtarget Info
+ const MipsSubtarget *Subtarget;
+
+ // Lower Operand helpers
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ bool IsGlobalInSmallSection(GlobalValue *GV);
+ bool IsInSmallSection(unsigned Size);
+
+ // Lower Operand specifics
+ SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 000000000000..b6a6d2f5c052
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,304 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+// - 32 64-bit registers (default mode)
+// - 16 even 32-bit registers (32-bit compatible mode) for
+// single and double access.
+// * 32bit fp:
+// - 16 even 32-bit registers - single and double (aliased)
+// - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>,
+ SDTCisInt<2>]>;
+def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain]>;
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
+def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+ def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+// single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+multiclass FFR1_1<bits<6> funct, string asmstr>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s $fd, $fs"), []>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>;
+}
+
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+{
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+ !strconcat(asmstr, ".s $fd, $fs"),
+ [(set FGR32:$fd, (FOp FGR32:$fs))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+ !strconcat(asmstr, ".d $fd, $fs"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+}
+
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+ RegisterClass RcDst, string asmstr>:
+ FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+ !strconcat(asmstr, " $fd, $fs"), []>;
+
+
+multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> {
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+ (ins FGR32:$fs, FGR32:$ft),
+ !strconcat(asmstr, ".s $fd, $fs, $ft"),
+ [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
+
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+ (ins AFGR64:$fs, AFGR64:$ft),
+ !strconcat(asmstr, ".d $fd, $fs, $ft"),
+ [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
+ Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+
+let ft = 0 in {
+ defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
+ defm CEIL_W : FFR1_1<0b001110, "ceil.w">;
+ defm ROUND_W : FFR1_1<0b001100, "round.w">;
+ defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
+ defm CVTW : FFR1_1<0b100100, "cvt.w">;
+ defm FMOV : FFR1_1<0b000110, "mov">;
+
+ defm FABS : FFR1_2<0b000101, "abs", fabs>;
+ defm FNEG : FFR1_2<0b000111, "neg", fneg>;
+ defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
+
+ /// Convert to Single Precison
+ def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32, FGR32, "cvt.s.w">;
+
+ let Predicates = [IsNotSingleFloat] in {
+ /// Ceil to long signed integer
+ def CEIL_LS : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
+ def CEIL_LD : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
+
+ /// Round to long signed integer
+ def ROUND_LS : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
+ def ROUND_LD : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
+
+ /// Floor to long signed integer
+ def FLOOR_LS : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
+ def FLOOR_LD : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
+
+ /// Trunc to long signed integer
+ def TRUNC_LS : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
+ def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
+
+ /// Convert to long signed integer
+ def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+ def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+ /// Convert to Double Precison
+ def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+ def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+ def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
+ /// Convert to Single Precison
+ def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
+ def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+ }
+}
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+ /// Move Control Registers From/To CPU Registers
+ def CFC1 : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+ "cfc1 $rt, $fs", []>;
+
+ def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+ "ctc1 $fs, $rt", []>;
+
+ def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+ "mfc1 $rt, $fs", []>;
+
+ def MTC1 : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+ "mtc1 $rt, $fs", []>;
+}
+
+/// Floating Point Memory Instructions
+let Predicates = [IsNotSingleFloat] in {
+ def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+ "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
+
+ def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+ "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+}
+
+// LWC1 and SWC1 can always be emited with odd registers.
+def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr",
+ [(set FGR32:$ft, (load addr:$addr))]>;
+def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr",
+ [(store FGR32:$ft, addr:$addr)]>;
+
+/// Floating-point Aritmetic
+defm FADD : FFR1_4<0x10, "add", fadd>;
+defm FDIV : FFR1_4<0x03, "div", fdiv>;
+defm FMUL : FFR1_4<0x02, "mul", fmul>;
+defm FSUB : FFR1_4<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
+def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
+def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in {
+ class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+ (ins brtarget:$dst), !strconcat(asmstr, " $dst"),
+ [(MipsFPBrcond op, bb:$dst, FCR31)]>;
+}
+def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
+def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
+def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
+def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
+def MIPS_FCOND_EQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let hasDelaySlot = 1, Defs=[FCR31] in {
+ def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+ "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc),
+ (implicit FCR31)]>;
+
+ def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+ "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc),
+ (implicit FCR31)]>, Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+
+// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a
+// condiciton code to enable easy handling by the Custom Inserter.
+let usesCustomDAGSchedInserter = 1, Uses=[FCR31] in {
+ class PseudoFPSelCC<RegisterClass RC, string asmstr> :
+ MipsPseudo<(outs RC:$dst),
+ (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr,
+ [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F,
+ imm:$cc))]>;
+}
+
+// The values to be selected are fp but the condition test is with integers.
+def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">;
+def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">,
+ Requires<[In32BitMode]>;
+
+// The values to be selected are int but the condition test is done with fp.
+def Select_FCC : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">;
+
+// The values to be selected and the condition test is done with fp.
+def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">;
+def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">,
+ Requires<[In32BitMode]>;
+
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+
+def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
+def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
+
+let Predicates = [In32BitMode] in {
+ def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
+ def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+}
+
+// MipsFPRound is only emitted for MipsI targets.
+def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
+
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 000000000000..0853272f7280
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,182 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MIPS instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rs - src reg.
+// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// rd - dst reg, only used on 3 regs instr.
+// shamt - only used on shift instructions, contains the shift amount.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "Mips";
+
+ bits<6> opcode;
+
+ // Top 5 bits are the 'opcode' field
+ let Inst{31-26} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin>:
+ MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> shamt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = shamt;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// FLOATING POINT INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// fs - src reg.
+// ft - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+// fd - dst reg, only used on 3 regs instr.
+// fmt - double or single precision.
+// funct - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+ bits<5> fmt;
+ bits<6> funct;
+
+ let opcode = op;
+ let funct = _funct;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> ft;
+ bits<5> base;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{25-21} = base;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fs;
+ bits<5> ft;
+ bits<4> cc;
+ bits<5> fmt;
+
+ let opcode = 0x11;
+ let fmt = _fmt;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = 0;
+ let Inst{5-4} = 0b11;
+ let Inst{3-0} = cc;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 000000000000..6225fa9c9884
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,623 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+ : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MipsInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const
+{
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ // addu $dst, $src, $zero || addu $dst, $zero, $src
+ // or $dst, $src, $zero || or $dst, $zero, $src
+ if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) {
+ if (MI.getOperand(1).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ // mov $fpDst, $fpSrc
+ // mfc $gpDst, $fpSrc
+ // mtc $fpDst, $gpSrc
+ if (MI.getOpcode() == Mips::FMOV_S32 ||
+ MI.getOpcode() == Mips::FMOV_D32 ||
+ MI.getOpcode() == Mips::MFC1 ||
+ MI.getOpcode() == Mips::MTC1 ||
+ MI.getOpcode() == Mips::MOVCCRToCCR) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+
+ // addiu $dst, $src, 0
+ if (MI.getOpcode() == Mips::ADDiu) {
+ if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
+ (MI->getOpcode() == Mips::LDC1)) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+ if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
+ (MI->getOpcode() == Mips::SDC1)) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+bool MipsInstrInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC != SrcRC) {
+
+ // Copy to/from FCR31 condition register
+ if ((DestRC == Mips::CPURegsRegisterClass) &&
+ (SrcRC == Mips::CCRRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg);
+ else if ((DestRC == Mips::CCRRegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg);
+
+ // Moves between coprocessors and cpu
+ else if ((DestRC == Mips::CPURegsRegisterClass) &&
+ (SrcRC == Mips::FGR32RegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg);
+ else if ((DestRC == Mips::FGR32RegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass))
+ BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
+
+ // Move from/to Hi/Lo registers
+ else if ((DestRC == Mips::HILORegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass)) {
+ unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO;
+ BuildMI(MBB, I, DL, get(Opc), DestReg);
+ } else if ((SrcRC == Mips::HILORegisterClass) &&
+ (DestRC == Mips::CPURegsRegisterClass)) {
+ unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
+ BuildMI(MBB, I, DL, get(Opc), DestReg);
+
+ // Can't copy this register
+ } else
+ return false;
+
+ return true;
+ }
+
+ if (DestRC == Mips::CPURegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg);
+ else if (DestRC == Mips::FGR32RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg);
+ else if (DestRC == Mips::AFGR64RegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg);
+ else if (DestRC == Mips::CCRRegisterClass)
+ BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg);
+ else
+ // Can't copy this register
+ return false;
+
+ return true;
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ unsigned Opc;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::SW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::SWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::SDC1;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+}
+
+void MipsInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill, SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::SW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::SWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::SDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const
+{
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::LW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::LWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::LDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI);
+}
+
+void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc;
+ if (RC == Mips::CPURegsRegisterClass)
+ Opc = Mips::LW;
+ else if (RC == Mips::FGR32RegisterClass)
+ Opc = Mips::LWC1;
+ else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ Opc = Mips::LDC1;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+MachineInstr *MipsInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const
+{
+ if (Ops.size() != 1) return NULL;
+
+ MachineInstr *NewMI = NULL;
+
+ switch (MI->getOpcode()) {
+ case Mips::ADDu:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(1).isReg()) &&
+ (MI->getOperand(1).getReg() == Mips::ZERO) &&
+ (MI->getOperand(2).isReg())) {
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ bool isKill = MI->getOperand(2).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ case Mips::FMOV_S32:
+ case Mips::FMOV_D32:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(1).isReg())) {
+ const TargetRegisterClass
+ *RC = RI.getRegClass(MI->getOperand(0).getReg());
+ unsigned StoreOpc, LoadOpc;
+
+ if (RC == Mips::FGR32RegisterClass) {
+ LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
+ } else {
+ assert(RC == Mips::AFGR64RegisterClass);
+ LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
+ }
+
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI) ;
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ }
+
+ return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+/// GetCondFromBranchOpc - Return the Mips CC that matches
+/// the correspondent Branch instruction opcode.
+static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ switch (BrOpc) {
+ default: return Mips::COND_INVALID;
+ case Mips::BEQ : return Mips::COND_E;
+ case Mips::BNE : return Mips::COND_NE;
+ case Mips::BGTZ : return Mips::COND_GZ;
+ case Mips::BGEZ : return Mips::COND_GEZ;
+ case Mips::BLTZ : return Mips::COND_LZ;
+ case Mips::BLEZ : return Mips::COND_LEZ;
+
+ // We dont do fp branch analysis yet!
+ case Mips::BC1T :
+ case Mips::BC1F : return Mips::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case Mips::COND_E : return Mips::BEQ;
+ case Mips::COND_NE : return Mips::BNE;
+ case Mips::COND_GZ : return Mips::BGTZ;
+ case Mips::COND_GEZ : return Mips::BGEZ;
+ case Mips::COND_LZ : return Mips::BLTZ;
+ case Mips::COND_LEZ : return Mips::BLEZ;
+
+ case Mips::FCOND_F:
+ case Mips::FCOND_UN:
+ case Mips::FCOND_EQ:
+ case Mips::FCOND_UEQ:
+ case Mips::FCOND_OLT:
+ case Mips::FCOND_ULT:
+ case Mips::FCOND_OLE:
+ case Mips::FCOND_ULE:
+ case Mips::FCOND_SF:
+ case Mips::FCOND_NGLE:
+ case Mips::FCOND_SEQ:
+ case Mips::FCOND_NGL:
+ case Mips::FCOND_LT:
+ case Mips::FCOND_NGE:
+ case Mips::FCOND_LE:
+ case Mips::FCOND_NGT: return Mips::BC1T;
+
+ case Mips::FCOND_T:
+ case Mips::FCOND_OR:
+ case Mips::FCOND_NEQ:
+ case Mips::FCOND_OGL:
+ case Mips::FCOND_UGE:
+ case Mips::FCOND_OGE:
+ case Mips::FCOND_UGT:
+ case Mips::FCOND_OGT:
+ case Mips::FCOND_ST:
+ case Mips::FCOND_GLE:
+ case Mips::FCOND_SNE:
+ case Mips::FCOND_GL:
+ case Mips::FCOND_NLT:
+ case Mips::FCOND_GE:
+ case Mips::FCOND_NLE:
+ case Mips::FCOND_GT: return Mips::BC1F;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case Mips::COND_E : return Mips::COND_NE;
+ case Mips::COND_NE : return Mips::COND_E;
+ case Mips::COND_GZ : return Mips::COND_LEZ;
+ case Mips::COND_GEZ : return Mips::COND_LZ;
+ case Mips::COND_LZ : return Mips::COND_GEZ;
+ case Mips::COND_LEZ : return Mips::COND_GZ;
+ case Mips::FCOND_F : return Mips::FCOND_T;
+ case Mips::FCOND_UN : return Mips::FCOND_OR;
+ case Mips::FCOND_EQ : return Mips::FCOND_NEQ;
+ case Mips::FCOND_UEQ: return Mips::FCOND_OGL;
+ case Mips::FCOND_OLT: return Mips::FCOND_UGE;
+ case Mips::FCOND_ULT: return Mips::FCOND_OGE;
+ case Mips::FCOND_OLE: return Mips::FCOND_UGT;
+ case Mips::FCOND_ULE: return Mips::FCOND_OGT;
+ case Mips::FCOND_SF: return Mips::FCOND_ST;
+ case Mips::FCOND_NGLE:return Mips::FCOND_GLE;
+ case Mips::FCOND_SEQ: return Mips::FCOND_SNE;
+ case Mips::FCOND_NGL: return Mips::FCOND_GL;
+ case Mips::FCOND_LT: return Mips::FCOND_NLT;
+ case Mips::FCOND_NGE: return Mips::FCOND_GE;
+ case Mips::FCOND_LE: return Mips::FCOND_NLE;
+ case Mips::FCOND_NGT: return Mips::FCOND_GT;
+ }
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const
+{
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (!LastInst->getDesc().isBranch())
+ return true;
+
+ // Unconditional branch
+ if (LastOpc == Mips::J) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == Mips::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+ if (LastOpc != Mips::COND_INVALID) {
+ int LastNumOp = LastInst->getNumOperands();
+
+ TBB = LastInst->getOperand(LastNumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+ for (int i=0; i<LastNumOp-1; i++) {
+ Cond.push_back(LastInst->getOperand(i));
+ }
+
+ return false;
+ }
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
+ int SecondNumOp = SecondLastInst->getNumOperands();
+
+ TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+ for (int i=0; i<SecondNumOp-1; i++) {
+ Cond.push_back(SecondLastInst->getOperand(i));
+ }
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The last
+ // one is not executed, so remove it.
+ if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
+ "Mips branch conditions can have two|three components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch?
+ BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+ const TargetInstrDesc &TID = get(Opc);
+
+ if (TID.getNumOperands() == 3)
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ .addReg(Cond[2].getReg())
+ .addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+ const TargetInstrDesc &TID = get(Opc);
+
+ if (TID.getNumOperands() == 3)
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
+ .addMBB(TBB);
+ else
+ BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+
+ BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB);
+ return 2;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != Mips::J &&
+ GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not
+/// fall-through into its successor block.
+bool MipsInstrInfo::
+BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
+{
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case Mips::RET: // Return.
+ case Mips::JR: // Indirect branch.
+ case Mips::J: // Uncond branch.
+ return true;
+ default: return false;
+ }
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert( (Cond.size() == 3 || Cond.size() == 2) &&
+ "Invalid Mips branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 000000000000..334244e6601a
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,223 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+namespace Mips {
+
+ // Mips Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // Mips Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_E,
+ COND_GZ,
+ COND_GEZ,
+ COND_LZ,
+ COND_LEZ,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(Mips::CondCode CC);
+
+ /// MipsCCToString - Map each FP condition code to its string
+ inline static const char *MipsFCCToString(Mips::CondCode CC)
+ {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+}
+
+class MipsInstrInfo : public TargetInstrInfoImpl {
+ MipsTargetMachine &TM;
+ const MipsRegisterInfo RI;
+public:
+ explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 000000000000..b9276fe495eb
--- /dev/null
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,707 @@
+//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
+def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain,
+ SDNPOutFlag]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+ SDNPOptInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Select Condition Code
+def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
+
+// Conditional Move
+def MipsCMov : SDNode<"MipsISD::CMov", SDT_MipsCMov>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov : Predicate<"Subtarget.hasCondMov()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def shamt : Operand<i32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ if (N->getValueType(0) == MVT::i32)
+ return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+ else
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+let isCommutable = 1 in
+class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+ InstrItinClass itin>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+let isCommutable = 1 in
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [], IIAlu>;
+
+// Arithmetic 2 register operands
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd=0 in
+class MArithR<bits<6> func, string instr_asm> :
+ FR< 0x1c,
+ func,
+ (outs CPURegs:$rs),
+ (ins CPURegs:$rt),
+ !strconcat(instr_asm, "\t$rs, $rt"),
+ [], IIImul>;
+
+// Logical
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+
+// Shifts
+let rt = 0 in
+class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, shamt:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>;
+
+class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins uimm16:$imm),
+ !strconcat(instr_asm, "\t$dst, $imm"),
+ [], IIAlu>;
+
+// Memory Load/Store
+let canFoldAsLoad = 1, hasDelaySlot = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$dst, mem:$addr),
+ !strconcat(instr_asm, "\t$dst, $addr"),
+ [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+
+// Conditional Branch
+let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, "\t$a, $b, $offset"),
+ [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ IIBranch>;
+
+
+class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
+ FI< op,
+ (outs),
+ (ins CPURegs:$src, brtarget:$offset),
+ !strconcat(instr_asm, "\t$src, $offset"),
+ [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+ IIBranch>;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+ PatFrag cond_op>:
+ FR< op,
+ func,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+ IIAlu>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+ Operand Od, PatLeaf imm_type>:
+ FI< op,
+ (outs CPURegs:$dst),
+ (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, "\t$dst, $b, $c"),
+ [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+ IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+ FJ< op,
+ (outs),
+ (ins brtarget:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs),
+ (ins CPURegs:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(brind CPURegs:$target)], IIBranch>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+ // All calls clobber the non-callee saved registers...
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
+ F14, F15, F16, F17, F18, F19], Uses = [GP] in {
+ class JumpLink<bits<6> op, string instr_asm>:
+ FJ< op,
+ (outs),
+ (ins calltarget:$target),
+ !strconcat(instr_asm, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch>;
+
+ let rd=31 in
+ class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+ FR< op,
+ func,
+ (outs),
+ (ins CPURegs:$rs),
+ !strconcat(instr_asm, "\t$rs"),
+ [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+
+ class BranchLink<string instr_asm>:
+ FI< 0x1,
+ (outs),
+ (ins CPURegs:$rs, brtarget:$target),
+ !strconcat(instr_asm, "\t$rs, $target"),
+ [], IIBranch>;
+}
+
+// Mul, Div
+class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR< 0x00,
+ func,
+ (outs),
+ (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$a, $b"),
+ [], itin>;
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (outs CPURegs:$dst),
+ (ins),
+ !strconcat(instr_asm, "\t$dst"),
+ [], IIHiLo>;
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+ FR< 0x00,
+ func,
+ (outs),
+ (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$src"),
+ [], IIHiLo>;
+
+class EffectiveAddress<string instr_asm> :
+ FI<0x09,
+ (outs CPURegs:$dst),
+ (ins mem:$addr),
+ instr_asm,
+ [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, SDNode CountOp>:
+ FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>;
+
+// Sign Extend in Register.
+class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
+ FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+
+// Byte Swap
+class ByteSwap<bits<6> func, string instr_asm>:
+ FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+ !strconcat(instr_asm, "\t$dst, $src"),
+ [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+
+// Conditional Move
+class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
+ FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+ [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T,
+ CPURegs:$cond, MovCode))], NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>;
+def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
+def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
+
+// The supported Mips ISAs dont have any instruction close to the SELECT_CC
+// operation. The solution is to create a Mips pseudo SELECT_CC instruction
+// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
+// replace it for real supported nodes into EmitInstrWithCustomInserter
+let usesCustomDAGSchedInserter = 1 in {
+ class PseudoSelCC<RegisterClass RC, string asmstr>:
+ MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
+ [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
+}
+
+def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu : ArithI<0x09, "addiu", add, simm16, immSExt16>;
+def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16>;
+def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
+def ANDi : LogicI<0x0c, "andi", and>;
+def ORi : LogicI<0x0d, "ori", or>;
+def XORi : LogicI<0x0e, "xori", xor>;
+def LUi : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu : ArithR<0x00, 0x21, "addu", add, IIAlu>;
+def SUBu : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
+def ADD : ArithOverflowR<0x00, 0x20, "add">;
+def SUB : ArithOverflowR<0x00, 0x22, "sub">;
+def SLT : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def AND : LogicR<0x24, "and", and>;
+def OR : LogicR<0x25, "or", or>;
+def XOR : LogicR<0x26, "xor", xor>;
+def NOR : LogicNOR<0x00, 0x27, "nor">;
+
+/// Shift Instructions
+def SLL : LogicR_shift_imm<0x00, "sll", shl>;
+def SRL : LogicR_shift_imm<0x02, "srl", srl>;
+def SRA : LogicR_shift_imm<0x03, "sra", sra>;
+def SLLV : LogicR_shift_reg<0x04, "sllv", shl>;
+def SRLV : LogicR_shift_reg<0x06, "srlv", srl>;
+def SRAV : LogicR_shift_reg<0x07, "srav", sra>;
+
+/// Load and Store Instructions
+def LB : LoadM<0x20, "lb", sextloadi8>;
+def LBu : LoadM<0x24, "lbu", zextloadi8>;
+def LH : LoadM<0x21, "lh", sextloadi16>;
+def LHu : LoadM<0x25, "lhu", zextloadi16>;
+def LW : LoadM<0x23, "lw", load>;
+def SB : StoreM<0x28, "sb", truncstorei8>;
+def SH : StoreM<0x29, "sh", truncstorei16>;
+def SW : StoreM<0x2b, "sw", store>;
+
+/// Jump and Branch Instructions
+def J : JumpFJ<0x02, "j">;
+def JR : JumpFR<0x00, 0x08, "jr">;
+def JAL : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ : CBranch<0x04, "beq", seteq>;
+def BNE : CBranch<0x05, "bne", setne>;
+
+let rt=1 in
+ def BGEZ : CBranchZero<0x01, "bgez", setge>;
+
+let rt=0 in {
+ def BGTZ : CBranchZero<0x07, "bgtz", setgt>;
+ def BLEZ : CBranchZero<0x07, "blez", setle>;
+ def BLTZ : CBranchZero<0x01, "bltz", setlt>;
+}
+
+def BGEZAL : BranchLink<"bgezal">;
+def BLTZAL : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+ def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+ "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+let Defs = [HI, LO] in {
+ def MULT : MulDiv<0x18, "mult", IIImul>;
+ def MULTu : MulDiv<0x19, "multu", IIImul>;
+ def DIV : MulDiv<0x1a, "div", IIIdiv>;
+ def DIVu : MulDiv<0x1b, "divu", IIIdiv>;
+}
+
+let Defs = [HI] in
+ def MTHI : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+ def MTLO : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+ def MFHI : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+ def MFLO : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+let Predicates = [HasSEInReg] in {
+ let shamt = 0x10, rs = 0 in
+ def SEB : SignExtInReg<0x21, "seb", i8>;
+
+ let shamt = 0x18, rs = 0 in
+ def SEH : SignExtInReg<0x20, "seh", i16>;
+}
+
+/// Count Leading
+let Predicates = [HasBitCount] in {
+ let rt = 0 in
+ def CLZ : CountLeading<0b010110, "clz", ctlz>;
+}
+
+/// Byte Swap
+let Predicates = [HasSwap] in {
+ let shamt = 0x3, rs = 0 in
+ def WSBW : ByteSwap<0x20, "wsbw">;
+}
+
+/// Conditional Move
+def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
+def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+
+let Predicates = [HasCondMov], isTwoAddress = 1 in {
+ def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
+ def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
+}
+
+/// No operation
+let addr=0 in
+ def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+
+// MADD*/MSUB* are not part of MipsI either.
+//def MADD : MArithR<0x00, "madd">;
+//def MADDU : MArithR<0x01, "maddu">;
+//def MSUB : MArithR<0x04, "msub">;
+//def MSUBU : MArithR<0x05, "msubu">;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+//def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+ (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+ (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+ (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+ (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$src, imm:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+ (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+def : Pat<(MipsJmpLink CPURegs:$dst),
+ (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+ (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+ (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+ (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+ (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+ (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+ (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
+ (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
+ (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+ (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+ (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond CPURegs:$cond, bb:$dst),
+ (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+
+// select patterns
+def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>;
+
+def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>;
+def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>;
+
+def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+ (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
+ (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+
+// setcc patterns
+def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
+def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
+
+def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
+ (SLT CPURegs:$rhs, CPURegs:$lhs)>;
+def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
+ (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
+
+def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
+ (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
+
+def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
+ (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 000000000000..b95394ec81ce
--- /dev/null
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,131 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// At each function entry, two special bitmask directives must be emitted
+ /// to help debugging, for CPU and FPU callee saved registers. Both need
+ /// the negative offset from the final stack size and its higher registers
+ /// location on the stack.
+ int CPUTopSavedRegOff;
+ int FPUTopSavedRegOff;
+
+ /// MipsFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MipsFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MipsFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MipsFIHolder GPHolder;
+
+ /// On LowerFORMAL_ARGUMENTS the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MipsFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFORMAL_ARGUMENTS, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+public:
+ MipsFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+ FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
+ HasStoreVarArgs(false), SRetReturnReg(0)
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+ void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+ int getFPUTopSavedRegOff() const { return FPUTopSavedRegOff; }
+ void setFPUTopSavedRegOff(int Off) { FPUTopSavedRegOff = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MipsFIHolder(FI, SPOffset));
+ }
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MipsFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+ }
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 000000000000..579d4db6422f
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,535 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+ switch (RegEnum) {
+ case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
+ case Mips::AT : case Mips::F1 : return 1;
+ case Mips::V0 : case Mips::F2 : case Mips::D1 : return 2;
+ case Mips::V1 : case Mips::F3 : return 3;
+ case Mips::A0 : case Mips::F4 : case Mips::D2 : return 4;
+ case Mips::A1 : case Mips::F5 : return 5;
+ case Mips::A2 : case Mips::F6 : case Mips::D3 : return 6;
+ case Mips::A3 : case Mips::F7 : return 7;
+ case Mips::T0 : case Mips::F8 : case Mips::D4 : return 8;
+ case Mips::T1 : case Mips::F9 : return 9;
+ case Mips::T2 : case Mips::F10: case Mips::D5: return 10;
+ case Mips::T3 : case Mips::F11: return 11;
+ case Mips::T4 : case Mips::F12: case Mips::D6: return 12;
+ case Mips::T5 : case Mips::F13: return 13;
+ case Mips::T6 : case Mips::F14: case Mips::D7: return 14;
+ case Mips::T7 : case Mips::F15: return 15;
+ case Mips::T8 : case Mips::F16: case Mips::D8: return 16;
+ case Mips::T9 : case Mips::F17: return 17;
+ case Mips::S0 : case Mips::F18: case Mips::D9: return 18;
+ case Mips::S1 : case Mips::F19: return 19;
+ case Mips::S2 : case Mips::F20: case Mips::D10: return 20;
+ case Mips::S3 : case Mips::F21: return 21;
+ case Mips::S4 : case Mips::F22: case Mips::D11: return 22;
+ case Mips::S5 : case Mips::F23: return 23;
+ case Mips::S6 : case Mips::F24: case Mips::D12: return 24;
+ case Mips::S7 : case Mips::F25: return 25;
+ case Mips::K0 : case Mips::F26: case Mips::D13: return 26;
+ case Mips::K1 : case Mips::F27: return 27;
+ case Mips::GP : case Mips::F28: case Mips::D14: return 28;
+ case Mips::SP : case Mips::F29: return 29;
+ case Mips::FP : case Mips::F30: case Mips::D15: return 30;
+ case Mips::RA : case Mips::F31: return 31;
+ default: assert(0 && "Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg(void) { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Mips callee-save register range is $16-$23, $f20-$f30
+ static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
+ Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0
+ };
+
+ static const unsigned BitMode32CalleeSavedRegs[] = {
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+ Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
+ Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRegs;
+ else
+ return BitMode32CalleeSavedRegs;
+}
+
+/// Mips Callee Saved Register Classes
+const TargetRegisterClass* const*
+MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
+{
+ static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = {
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
+ };
+
+ static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = {
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
+ &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass,
+ &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0
+ };
+
+ if (Subtarget.isSingleFloat())
+ return SingleFloatOnlyCalleeSavedRC;
+ else
+ return BitMode32CalleeSavedRC;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const
+{
+ BitVector Reserved(getNumRegs());
+ Reserved.set(Mips::ZERO);
+ Reserved.set(Mips::AT);
+ Reserved.set(Mips::K0);
+ Reserved.set(Mips::K1);
+ Reserved.set(Mips::GP);
+ Reserved.set(Mips::SP);
+ Reserved.set(Mips::FP);
+ Reserved.set(Mips::RA);
+
+ // SRV4 requires that odd register can't be used.
+ if (!Subtarget.isSingleFloat())
+ for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
+ Reserved.set(FReg);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack referencesare are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+// The stack frame required by the ABI (after call):
+// Offset
+//
+// 0 ----------
+// 4 Args to pass
+// . saved $GP (used in PIC)
+// . Alloca allocations
+// . Local Area
+// . CPU "Callee Saved" Registers
+// . saved FP
+// . saved RA
+// . FPU "Callee Saved" Registers
+// StackSize -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+// for any local area var there is smt like : FI >= 0, StackOffset: 4
+// sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+// The emitted instruction will be something like:
+// lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFORMAL_ARGUMENTS, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
+{
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+
+ // Min and Max CSI FrameIndex.
+ int MinCSFI = -1, MaxCSFI = -1;
+
+ // See the description at MipsMachineFunction.h
+ int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MipsFI->adjustLoadArgsFI(MFI);
+ MipsFI->adjustStoreVarArgsFI(MFI);
+
+ // It happens that the default stack frame allocation order does not directly
+ // map to the convention used for mips. So we must fix it. We move the callee
+ // save register slots after the local variables area, as described in the
+ // stack frame above.
+ unsigned CalleeSavedAreaSize = 0;
+ if (!CSI.empty()) {
+ MinCSFI = CSI[0].getFrameIdx();
+ MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
+ }
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+ CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+
+ // Adjust local variables. They should come on the stack right
+ // after the arguments.
+ int LastOffsetFI = -1;
+ for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (i >= MinCSFI && i <= MaxCSFI)
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Offset = MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+ if (LastOffsetFI == -1)
+ LastOffsetFI = i;
+ if (Offset > MFI->getObjectOffset(LastOffsetFI))
+ LastOffsetFI = i;
+ MFI->setObjectOffset(i, Offset);
+ }
+
+ // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+ // be saved in this CPU Area there is the need. This whole Area must
+ // be aligned to the default Stack Alignment requirements.
+ unsigned StackOffset = 0;
+ unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8;
+
+ if (LastOffsetFI >= 0)
+ StackOffset = MFI->getObjectOffset(LastOffsetFI)+
+ MFI->getObjectSize(LastOffsetFI);
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
+ if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass)
+ break;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+
+ if (hasFP(MF)) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ StackOffset);
+ MipsFI->setFPStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ if (MFI->hasCalls()) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ StackOffset);
+ MipsFI->setRAStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Adjust FPU Callee Saved Registers Area. This Area must be
+ // aligned to the default Stack Alignment requirements.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ continue;
+ MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+ TopFPUSavedRegOff = StackOffset;
+ StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+ }
+ StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+ // Update frame info
+ MFI->setStackSize(StackOffset);
+
+ // Recalculate the final tops offset. The final values must be '0'
+ // if there isn't a callee saved register for CPU or FPU, otherwise
+ // a negative offset is needed.
+ if (TopCPUSavedRegOff >= 0)
+ MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+
+ if (TopFPUSavedRegOff >= 0)
+ MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsRegisterInfo::
+hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ #ifndef NDEBUG
+ DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DOUT << "<--------->\n";
+ MI.print(DOUT);
+ #endif
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ #ifndef NDEBUG
+ DOUT << "FrameIndex : " << FrameIndex << "\n";
+ DOUT << "spOffset : " << spOffset << "\n";
+ DOUT << "stackSize : " << stackSize << "\n";
+ #endif
+
+ // as explained on LowerFORMAL_ARGUMENTS, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset));
+ Offset += MI.getOperand(i-1).getImm();
+
+ #ifndef NDEBUG
+ DOUT << "Offset : " << Offset << "\n";
+ DOUT << "<--------->\n";
+ #endif
+
+ MI.getOperand(i-1).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MipsRegisterInfo::
+emitPrologue(MachineFunction &MF) const
+{
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+
+ // Get the right frame order for Mips.
+ adjustMipsStackFrame(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->hasCalls()) return;
+
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+ // TODO: check need from GP here.
+ if (isPIC && Subtarget.isABI_O32())
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg());
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+ // Adjust stack : addi sp, sp, (-imm)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(-StackSize);
+
+ // Save the return address only if the function isnt a leaf one.
+ // sw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // sw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+ .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+
+ // move $fp, $sp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+ .addReg(Mips::SP).addReg(Mips::ZERO);
+ }
+
+ // PIC speficic function prologue
+ if ((isPIC) && (MFI->hasCalls())) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+ .addImm(MipsFI->getGPStackOffset());
+ }
+}
+
+void MipsRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MipsFI->getFPStackOffset();
+ int RAOffset = MipsFI->getRAStackOffset();
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // move $sp, $fp
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
+ .addReg(Mips::FP).addReg(Mips::ZERO);
+
+ // lw $fp,stack_loc($sp)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
+ .addImm(FPOffset).addReg(Mips::SP);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lw $ra, stack_loc($sp)
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
+ .addImm(RAOffset).addReg(Mips::SP);
+ }
+
+ // adjust stack : insert addi sp, sp, (imm)
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(NumBytes);
+ }
+}
+
+
+void MipsRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the SPOffset on the FI where GP must be saved/loaded.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+ if (MFI->hasCalls() && isPIC) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset());
+ }
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+ return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int MipsRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "MipsGenRegisterInfo.inc"
+
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 000000000000..808e995b4ed3
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,78 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MipsGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+ const MipsSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// Mips::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg(void);
+
+ /// Adjust the Mips stack frame.
+ void adjustMipsStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 000000000000..bbb275c66242
--- /dev/null
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,252 @@
+//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+ let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+class AFPR<bits<5> num, string n, list<Register> aliases> : MipsReg<n> {
+ let Num = num;
+ let Aliases = aliases;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+
+ // General Purpose Registers
+ def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+ def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>;
+ def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>;
+ def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>;
+ def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[5]>;
+ def A1 : MipsGPRReg< 5, "5">, DwarfRegNum<[5]>;
+ def A2 : MipsGPRReg< 6, "6">, DwarfRegNum<[6]>;
+ def A3 : MipsGPRReg< 7, "7">, DwarfRegNum<[7]>;
+ def T0 : MipsGPRReg< 8, "8">, DwarfRegNum<[8]>;
+ def T1 : MipsGPRReg< 9, "9">, DwarfRegNum<[9]>;
+ def T2 : MipsGPRReg< 10, "10">, DwarfRegNum<[10]>;
+ def T3 : MipsGPRReg< 11, "11">, DwarfRegNum<[11]>;
+ def T4 : MipsGPRReg< 12, "12">, DwarfRegNum<[12]>;
+ def T5 : MipsGPRReg< 13, "13">, DwarfRegNum<[13]>;
+ def T6 : MipsGPRReg< 14, "14">, DwarfRegNum<[14]>;
+ def T7 : MipsGPRReg< 15, "15">, DwarfRegNum<[15]>;
+ def S0 : MipsGPRReg< 16, "16">, DwarfRegNum<[16]>;
+ def S1 : MipsGPRReg< 17, "17">, DwarfRegNum<[17]>;
+ def S2 : MipsGPRReg< 18, "18">, DwarfRegNum<[18]>;
+ def S3 : MipsGPRReg< 19, "19">, DwarfRegNum<[19]>;
+ def S4 : MipsGPRReg< 20, "20">, DwarfRegNum<[20]>;
+ def S5 : MipsGPRReg< 21, "21">, DwarfRegNum<[21]>;
+ def S6 : MipsGPRReg< 22, "22">, DwarfRegNum<[22]>;
+ def S7 : MipsGPRReg< 23, "23">, DwarfRegNum<[23]>;
+ def T8 : MipsGPRReg< 24, "24">, DwarfRegNum<[24]>;
+ def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>;
+ def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>;
+ def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>;
+ def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>;
+ def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
+ def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
+ def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
+
+ /// Mips Single point precision FPU Registers
+ def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+ /// Mips Double point precision FPU Registers (aliased
+ /// with the single precision to hold 64 bit values)
+ def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+ def D1 : AFPR< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+ def D2 : AFPR< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>;
+ def D3 : AFPR< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>;
+ def D4 : AFPR< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>;
+ def D5 : AFPR<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+ def D6 : AFPR<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+ def D7 : AFPR<14, "F14", [F14, F15]>, DwarfRegNum<[46]>;
+ def D8 : AFPR<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+ def D9 : AFPR<18, "F18", [F18, F19]>, DwarfRegNum<[50]>;
+ def D10 : AFPR<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+ def D11 : AFPR<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+ def D12 : AFPR<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+ def D13 : AFPR<26, "F26", [F26, F27]>, DwarfRegNum<[58]>;
+ def D14 : AFPR<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+ def D15 : AFPR<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+ // Hi/Lo registers
+ def HI : Register<"hi">, DwarfRegNum<[64]>;
+ def LO : Register<"lo">, DwarfRegNum<[65]>;
+
+ // Status flags register
+ def FCR31 : Register<"31">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32,
+ // Return Values and Arguments
+ [V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Reserved
+ ZERO, AT, K0, K1, GP, SP, FP, RA]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CPURegsClass::iterator
+ CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // The last 8 registers on the list above are reserved
+ return end()-8;
+ }
+ }];
+}
+
+// 64bit fp:
+// * FGR64 - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32,
+ // Return Values and Arguments
+ [F0, F1, F2, F3, F12, F13, F14, F15,
+ // Not preserved across procedure calls
+ F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
+ // Callee save
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+ // Reserved
+ F31]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+
+ static const unsigned MIPS_FGR32[] = {
+ Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13,
+ Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7,
+ Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17,
+ Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23,
+ Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+ Mips::F30
+ };
+
+ static const unsigned MIPS_SVR4_FGR32[] = {
+ Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4,
+ Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18,
+ Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
+ };
+
+ FGR32Class::iterator
+ FGR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isSingleFloat())
+ return MIPS_FGR32;
+ else
+ return MIPS_SVR4_FGR32;
+ }
+
+ FGR32Class::iterator
+ FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isSingleFloat())
+ return MIPS_FGR32 + (sizeof(MIPS_FGR32) / sizeof(unsigned));
+ else
+ return MIPS_SVR4_FGR32 + (sizeof(MIPS_SVR4_FGR32) / sizeof(unsigned));
+ }
+ }];
+}
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64,
+ // Return Values and Arguments
+ [D0, D1, D6, D7,
+ // Not preserved across procedure calls
+ D2, D3, D4, D5, D8, D9,
+ // Callee save
+ D10, D11, D12, D13, D14,
+ // Reserved
+ D15]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ AFGR64Class::iterator
+ AFGR64Class::allocation_order_end(const MachineFunction &MF) const {
+ // The last register on the list above is reserved
+ return end()-1;
+ }
+ }];
+}
+
+// Condition Register for floating point operations
+def CCR : RegisterClass<"Mips", [i32], 32, [FCR31]>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>;
+
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 000000000000..0c3ca57361cd
--- /dev/null
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 000000000000..4245f274f8f0
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,77 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsGenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+NotABICall("disable-mips-abicall", cl::Hidden,
+ cl::desc("Disable code for SVR4-style dynamic objects"));
+static cl::opt<bool>
+AbsoluteCall("enable-mips-absolute-call", cl::Hidden,
+ cl::desc("Enable absolute call within abicall"));
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+ cl::desc("Small data and bss section threshold size (default=8)"),
+ cl::init(8));
+
+MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool little) :
+ MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
+ IsFP64bit(false), IsGP64bit(false), HasVFPU(false), HasABICall(true),
+ HasAbsoluteCall(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
+ HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false)
+{
+ std::string CPU = "mips1";
+ MipsArchVersion = Mips1;
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+ const std::string& TT = M.getTargetTriple();
+
+ // Small section size threshold
+ SSectionThreshold = SSThreshold;
+
+ // Is the target system Linux ?
+ if (TT.find("linux") == std::string::npos)
+ IsLinux = false;
+
+ // When only the target triple is specified and is
+ // a allegrex target, set the features. We also match
+ // big and little endian allegrex cores (dont really
+ // know if a big one exists)
+ if (TT.find("mipsallegrex") != std::string::npos ||
+ TT.find("psp") != std::string::npos) {
+ MipsABI = EABI;
+ IsSingleFloat = true;
+ MipsArchVersion = Mips2;
+ HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
+ HasSEInReg = true;
+ HasBitCount = true;
+ HasSwap = true;
+ HasCondMov = true;
+ }
+
+ // Abicall is the default for O32 ABI, but is disabled within EABI and in
+ // static code.
+ if (NotABICall || isABI_EABI() || (TM.getRelocationModel() == Reloc::Static))
+ HasABICall = false;
+
+ // TODO: disable when handling 64 bit symbols in the future.
+ if (HasABICall && AbsoluteCall)
+ HasAbsoluteCall = true;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 000000000000..61c37c1d377e
--- /dev/null
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,139 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class MipsSubtarget : public TargetSubtarget {
+
+public:
+ enum MipsABIEnum {
+ O32, O64, N32, N64, EABI
+ };
+
+protected:
+
+ enum MipsArchEnum {
+ Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2
+ };
+
+ // Mips architecture version
+ MipsArchEnum MipsArchVersion;
+
+ // Mips supported ABIs
+ MipsABIEnum MipsABI;
+
+ // IsLittle - The target is Little Endian
+ bool IsLittle;
+
+ // IsSingleFloat - The target only supports single precision float
+ // point operations. This enable the target to use all 32 32-bit
+ // floating point registers instead of only using even ones.
+ bool IsSingleFloat;
+
+ // IsFP64bit - The target processor has 64-bit floating point registers.
+ bool IsFP64bit;
+
+ // IsFP64bit - General-purpose registers are 64 bits wide
+ bool IsGP64bit;
+
+ // HasVFPU - Processor has a vector floating point unit.
+ bool HasVFPU;
+
+ // IsABICall - Enable SRV4 code for SVR4-style dynamic objects
+ bool HasABICall;
+
+ // HasAbsoluteCall - Enable code that is not fully position-independent.
+ // Only works with HasABICall enabled.
+ bool HasAbsoluteCall;
+
+ // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+ bool IsLinux;
+
+ // Put global and static items less than or equal to SSectionThreshold
+ // bytes into the small data or bss section. The default is 8.
+ unsigned SSectionThreshold;
+
+ /// Features related to the presence of specific instructions.
+
+ // HasSEInReg - SEB and SEH (signext in register) instructions.
+ bool HasSEInReg;
+
+ // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+ bool HasCondMov;
+
+ // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+ // instructions.
+ bool HasMulDivAdd;
+
+ // HasMinMax - MIN and MAX instructions.
+ bool HasMinMax;
+
+ // HasSwap - Byte and half swap instructions.
+ bool HasSwap;
+
+ // HasBitCount - Count leading '1' and '0' bits.
+ bool HasBitCount;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// Only O32 and EABI supported right now.
+ bool isABI_EABI() const { return MipsABI == EABI; }
+ bool isABI_O32() const { return MipsABI == O32; }
+ unsigned getTargetABI() const { return MipsABI; }
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ MipsSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool little);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool isMips1() const { return MipsArchVersion == Mips1; }
+
+ bool isLittle() const { return IsLittle; }
+ bool isFP64bit() const { return IsFP64bit; };
+ bool isGP64bit() const { return IsGP64bit; };
+ bool isGP32bit() const { return !IsGP64bit; };
+ bool isSingleFloat() const { return IsSingleFloat; };
+ bool isNotSingleFloat() const { return !IsSingleFloat; };
+ bool hasVFPU() const { return HasVFPU; };
+ bool hasABICall() const { return HasABICall; };
+ bool hasAbsoluteCall() const { return HasAbsoluteCall; };
+ bool isLinux() const { return IsLinux; };
+ unsigned getSSectionThreshold() const { return SSectionThreshold; }
+
+ /// Features related to the presence of specific instructions.
+ bool hasSEInReg() const { return HasSEInReg; };
+ bool hasCondMov() const { return HasCondMov; };
+ bool hasMulDivAdd() const { return HasMulDivAdd; };
+ bool hasMinMax() const { return HasMinMax; };
+ bool hasSwap() const { return HasSwap; };
+ bool hasBitCount() const { return HasBitCount; };
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.cpp b/lib/Target/Mips/MipsTargetAsmInfo.cpp
new file mode 100644
index 000000000000..c197b0c2981c
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.cpp
@@ -0,0 +1,98 @@
+//===-- MipsTargetAsmInfo.cpp - Mips asm properties -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalVariable.h"
+
+using namespace llvm;
+
+MipsTargetAsmInfo::MipsTargetAsmInfo(const MipsTargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveTakesAlignment = true;
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = NULL;
+ PrivateGlobalPrefix = "$";
+ JumpTableDataSection = "\t.rdata";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ BSSSection = "\t.section\t.bss";
+ CStringSection = ".rodata.str";
+
+ if (!Subtarget->hasABICall()) {
+ JumpTableDirective = "\t.word\t";
+ SmallDataSection = getNamedSection("\t.sdata", SectionFlags::Writeable);
+ SmallBSSSection = getNamedSection("\t.sbss",
+ SectionFlags::Writeable |
+ SectionFlags::BSS);
+ } else
+ JumpTableDirective = "\t.gpword\t";
+
+}
+
+unsigned MipsTargetAsmInfo::
+SectionFlagsForGlobal(const GlobalValue *GV, const char* Name) const {
+ unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name);
+ // Mask out Small Section flag bit, Mips doesnt support 's' section symbol
+ // for its small sections.
+ return (Flags & (~SectionFlags::Small));
+}
+
+SectionKind::Kind MipsTargetAsmInfo::
+SectionKindForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind K = ELFTargetAsmInfo::SectionKindForGlobal(GV);
+
+ if (Subtarget->hasABICall())
+ return K;
+
+ if (K != SectionKind::Data && K != SectionKind::BSS &&
+ K != SectionKind::RODataMergeConst)
+ return K;
+
+ if (isa<GlobalVariable>(GV)) {
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ unsigned Threshold = Subtarget->getSSectionThreshold();
+
+ if (Size > 0 && Size <= Threshold) {
+ if (K == SectionKind::BSS)
+ return SectionKind::SmallBSS;
+ else
+ return SectionKind::SmallData;
+ }
+ }
+
+ return K;
+}
+
+const Section* MipsTargetAsmInfo::
+SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind K = SectionKindForGlobal(GV);
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+
+ if (GVA && (!GVA->isWeakForLinker()))
+ switch (K) {
+ case SectionKind::SmallData:
+ return getSmallDataSection();
+ case SectionKind::SmallBSS:
+ return getSmallBSSSection();
+ default: break;
+ }
+
+ return ELFTargetAsmInfo::SelectSectionForGlobal(GV);
+}
diff --git a/lib/Target/Mips/MipsTargetAsmInfo.h b/lib/Target/Mips/MipsTargetAsmInfo.h
new file mode 100644
index 000000000000..2b5a739e7219
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetAsmInfo.h
@@ -0,0 +1,51 @@
+//=====-- MipsTargetAsmInfo.h - Mips asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class GlobalValue;
+ class MipsTargetMachine;
+
+ struct MipsTargetAsmInfo : public ELFTargetAsmInfo {
+ explicit MipsTargetAsmInfo(const MipsTargetMachine &TM);
+
+ /// SectionKindForGlobal - This hook allows the target to select proper
+ /// section kind used for global emission.
+ virtual SectionKind::Kind
+ SectionKindForGlobal(const GlobalValue *GV) const;
+
+ /// SectionFlagsForGlobal - This hook allows the target to select proper
+ /// section flags either for given global or for section.
+ virtual unsigned
+ SectionFlagsForGlobal(const GlobalValue *GV = NULL,
+ const char* name = NULL) const;
+
+ virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const;
+
+ private:
+ const MipsSubtarget *Subtarget;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 000000000000..ef524e3ecd72
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,133 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsTargetAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// MipsTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int MipsTargetMachineModule;
+int MipsTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<MipsTargetMachine> X("mips", "Mips");
+static RegisterTarget<MipselTargetMachine> Y("mipsel", "Mipsel");
+
+const TargetAsmInfo *MipsTargetMachine::
+createTargetAsmInfo() const
+{
+ return new MipsTargetAsmInfo(*this);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false):
+ Subtarget(*this, M, FS, isLittle),
+ DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") :
+ std::string("E-p:32:32:32-i8:8:32-i16:16:32")),
+ InstrInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+ TLInfo(*this)
+{
+ // Abicall enables PIC by default
+ if (Subtarget.hasABICall())
+ setRelocationModel(Reloc::PIC_);
+
+ // TODO: create an option to enable long calls, like -mlong-calls,
+ // that would be our CodeModel::Large. It must not work with Abicall.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+}
+
+MipselTargetMachine::
+MipselTargetMachine(const Module &M, const std::string &FS) :
+ MipsTargetMachine(M, FS, true) {}
+
+// return 0 and must specify -march to gen MIPS code.
+unsigned MipsTargetMachine::
+getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "mips*-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-")
+ return 20;
+
+ if (TT.size() >= 13 && std::string(TT.begin(),
+ TT.begin()+13) == "mipsallegrex-")
+ return 20;
+
+ return 0;
+}
+
+// return 0 and must specify -march to gen MIPSEL code.
+unsigned MipselTargetMachine::
+getModuleMatchQuality(const Module &M)
+{
+ // We strongly match "mips*el-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && std::string(TT.begin(), TT.begin()+7) == "mipsel-")
+ return 20;
+
+ if (TT.size() >= 15 && std::string(TT.begin(),
+ TT.begin()+15) == "mipsallegrexel-")
+ return 20;
+
+ if (TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "psp")
+ return 20;
+
+ return 0;
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+ PM.add(createMipsDelaySlotFillerPass(*this));
+ return true;
+}
+
+// Implements the AssemblyEmitter for the target. Must return
+// true if AssemblyEmitter is supported
+bool MipsTargetMachine::
+addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out)
+{
+ // Output assembly language.
+ PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 000000000000..a9e1df27ae7f
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,80 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+ class raw_ostream;
+
+ class MipsTargetMachine : public LLVMTargetMachine {
+ MipsSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MipsInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ MipsTargetLowering TLInfo;
+
+ protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ public:
+ MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
+
+ virtual const MipsInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const
+ { return &FrameInfo; }
+ virtual const MipsSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual MipsTargetLowering *getTargetLowering() const {
+ return const_cast<MipsTargetLowering*>(&TLInfo);
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ };
+
+/// MipselTargetMachine - Mipsel target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+ MipselTargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
new file mode 100644
index 000000000000..00d737af4c2e
--- /dev/null
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_TARGET_DEFINITIONS PIC16.td)
+
+tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PIC16GenRegisterNames.inc -gen-register-enums)
+tablegen(PIC16GenRegisterInfo.inc -gen-register-desc)
+tablegen(PIC16GenInstrNames.inc -gen-instr-enums)
+tablegen(PIC16GenInstrInfo.inc -gen-instr-desc)
+tablegen(PIC16GenAsmWriter.inc -gen-asm-writer)
+tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
+tablegen(PIC16GenCallingConv.inc -gen-callingconv)
+tablegen(PIC16GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PIC16
+ PIC16AsmPrinter.cpp
+ PIC16DebugInfo.cpp
+ PIC16InstrInfo.cpp
+ PIC16ISelDAGToDAG.cpp
+ PIC16ISelLowering.cpp
+ PIC16MemSelOpt.cpp
+ PIC16RegisterInfo.cpp
+ PIC16Subtarget.cpp
+ PIC16TargetAsmInfo.cpp
+ PIC16TargetMachine.cpp
+ )
diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile
new file mode 100644
index 000000000000..c429324cc2d1
--- /dev/null
+++ b/lib/Target/PIC16/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMPIC16
+TARGET = PIC16
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \
+ PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \
+ PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \
+ PIC16GenDAGISel.inc PIC16GenCallingConv.inc \
+ PIC16GenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
new file mode 100644
index 000000000000..40bed2f50e10
--- /dev/null
+++ b/lib/Target/PIC16/PIC16.h
@@ -0,0 +1,345 @@
+//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM PIC16 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PIC16_H
+#define LLVM_TARGET_PIC16_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <iosfwd>
+#include <cassert>
+#include <sstream>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+ class PIC16TargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+namespace PIC16CC {
+ enum CondCodes {
+ EQ,
+ NE,
+ LT,
+ LE,
+ GT,
+ GE,
+ ULT,
+ UGT,
+ ULE,
+ UGE
+ };
+}
+ // A Central class to manage all ABI naming conventions.
+ // PAN - [P]ic16 [A]BI [N]ames
+ class PAN {
+ public:
+ // Map the name of the symbol to its section name.
+ // Current ABI:
+ // -----------------------------------------------------
+ // ALL Names are prefixed with the symobl '@'.
+ // ------------------------------------------------------
+ // Global variables do not have any '.' in their names.
+ // These are maily function names and global variable names.
+ // Example - @foo, @i
+ // -------------------------------------------------------
+ // Functions and auto variables.
+ // Names are mangled as <prefix><funcname>.<tag>.<varname>
+ // Where <prefix> is '@' and <tag> is any one of
+ // the following
+ // .auto. - an automatic var of a function.
+ // .temp. - temproray data of a function.
+ // .ret. - return value label for a function.
+ // .frame. - Frame label for a function where retval, args
+ // and temps are stored.
+ // .args. - Label used to pass arguments to a direct call.
+ // Example - Function name: @foo
+ // Its frame: @foo.frame.
+ // Its retval: @foo.ret.
+ // Its local vars: @foo.auto.a
+ // Its temp data: @foo.temp.
+ // Its arg passing: @foo.args.
+ //----------------------------------------------
+ // Libcall - compiler generated libcall names must start with .lib.
+ // This id will be used to emit extern decls for libcalls.
+ // Example - libcall name: @.lib.sra.i8
+ // To pass args: @.lib.sra.i8.args.
+ // To return val: @.lib.sra.i8.ret.
+ //----------------------------------------------
+ // SECTION Names
+ // uninitialized globals - @udata.<num>.#
+ // initialized globals - @idata.<num>.#
+ // Function frame - @<func>.frame_section.
+ // Function autos - @<func>.autos_section.
+ // Declarations - @section.0
+ //----------------------------------------------------------
+
+ // Tags used to mangle different names.
+ enum TAGS {
+ PREFIX_SYMBOL,
+ GLOBAL,
+ STATIC_LOCAL,
+ AUTOS_LABEL,
+ FRAME_LABEL,
+ RET_LABEL,
+ ARGS_LABEL,
+ TEMPS_LABEL,
+
+ LIBCALL,
+
+ FRAME_SECTION,
+ AUTOS_SECTION,
+ CODE_SECTION
+ };
+
+ // Textual names of the tags.
+ inline static const char *getTagName(TAGS tag) {
+ switch (tag) {
+ default: return "";
+ case PREFIX_SYMBOL: return "@";
+ case AUTOS_LABEL: return ".auto.";
+ case FRAME_LABEL: return ".frame.";
+ case TEMPS_LABEL: return ".temp.";
+ case ARGS_LABEL: return ".args.";
+ case RET_LABEL: return ".ret.";
+ case LIBCALL: return ".lib.";
+ case FRAME_SECTION: return ".frame_section.";
+ case AUTOS_SECTION: return ".autos_section.";
+ case CODE_SECTION: return ".code_section.";
+ }
+ }
+
+ // Get tag type for the Symbol.
+ inline static TAGS getSymbolTag(const std::string &Sym) {
+ if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos)
+ return TEMPS_LABEL;
+
+ if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos)
+ return FRAME_LABEL;
+
+ if (Sym.find(getTagName(RET_LABEL)) != std::string::npos)
+ return RET_LABEL;
+
+ if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos)
+ return ARGS_LABEL;
+
+ if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos)
+ return AUTOS_LABEL;
+
+ if (Sym.find(getTagName(LIBCALL)) != std::string::npos)
+ return LIBCALL;
+
+ // It does not have any Tag. So its a true global or static local.
+ if (Sym.find(".") == std::string::npos)
+ return GLOBAL;
+
+ // If a . is there, then it may be static local.
+ // We should mangle these as well in clang.
+ if (Sym.find(".") != std::string::npos)
+ return STATIC_LOCAL;
+
+ assert (0 && "Could not determine Symbol's tag");
+ }
+
+ // addPrefix - add prefix symbol to a name if there isn't one already.
+ inline static std::string addPrefix (const std::string &Name) {
+ std::string prefix = getTagName (PREFIX_SYMBOL);
+
+ // If this name already has a prefix, nothing to do.
+ if (Name.compare(0, prefix.size(), prefix) == 0)
+ return Name;
+
+ return prefix + Name;
+ }
+
+ // Get mangled func name from a mangled sym name.
+ // In all cases func name is the first component before a '.'.
+ static inline std::string getFuncNameForSym(const std::string &Sym1) {
+ assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function");
+
+ std::string Sym = addPrefix(Sym1);
+
+ // Position of the . after func name. That's where func name ends.
+ size_t func_name_end = Sym.find ('.');
+
+ return Sym.substr (0, func_name_end);
+ }
+
+ // Get Frame start label for a func.
+ static std::string getFrameLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(FRAME_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getRetvalLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(RET_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getArgsLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(ARGS_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getTempdataLabel(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(TEMPS_LABEL);
+ return Func1 + tag;
+ }
+
+ static std::string getFrameSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(FRAME_SECTION);
+ return Func1 + tag + "# UDATA_OVR";
+ }
+
+ static std::string getAutosSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(AUTOS_SECTION);
+ return Func1 + tag + "# UDATA_OVR";
+ }
+
+ static std::string getCodeSectionName(const std::string &Func) {
+ std::string Func1 = addPrefix(Func);
+ std::string tag = getTagName(CODE_SECTION);
+ return Func1 + tag + "# CODE";
+ }
+
+ // udata and idata section names are generated by a given number.
+ // @udata.<num>.#
+ static std::string getUdataSectionName(unsigned num) {
+ std::ostringstream o;
+ o << getTagName(PREFIX_SYMBOL) << "udata." << num << ".# UDATA";
+ return o.str();
+ }
+
+ static std::string getIdataSectionName(unsigned num) {
+ std::ostringstream o;
+ o << getTagName(PREFIX_SYMBOL) << "idata." << num << ".# IDATA";
+ return o.str();
+ }
+
+ inline static bool isLocalName (const std::string &Name) {
+ if (getSymbolTag(Name) == AUTOS_LABEL)
+ return true;
+
+ return false;
+ }
+
+ inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
+ if (! isLocalName(Var)) return false;
+
+ std::string Func1 = addPrefix(Func);
+ // Extract func name of the varilable.
+ const std::string &fname = getFuncNameForSym(Var);
+
+ if (fname.compare(Func1) == 0)
+ return true;
+
+ return false;
+ }
+
+
+ // Get the section for the given external symbol names.
+ // This tries to find the type (Tag) of the symbol from its mangled name
+ // and return appropriate section name for it.
+ static inline std::string getSectionNameForSym(const std::string &Sym1) {
+ std::string Sym = addPrefix(Sym1);
+
+ std::string SectionName;
+
+ std::string Fname = getFuncNameForSym (Sym);
+ TAGS id = getSymbolTag (Sym);
+
+ switch (id) {
+ default : assert (0 && "Could not determine external symbol type");
+ case FRAME_LABEL:
+ case RET_LABEL:
+ case TEMPS_LABEL:
+ case ARGS_LABEL: {
+ return getFrameSectionName(Fname);
+ }
+ case AUTOS_LABEL: {
+ return getAutosSectionName(Fname);
+ }
+ }
+ }
+ }; // class PAN.
+
+
+ // External symbol names require memory to live till the program end.
+ // So we have to allocate it and keep.
+ inline static const char *createESName (const std::string &name) {
+ char *tmpName = new char[name.size() + 1];
+ strcpy (tmpName, name.c_str());
+ return tmpName;
+ }
+
+
+
+ inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case PIC16CC::NE: return "ne";
+ case PIC16CC::EQ: return "eq";
+ case PIC16CC::LT: return "lt";
+ case PIC16CC::ULT: return "lt";
+ case PIC16CC::LE: return "le";
+ case PIC16CC::GT: return "gt";
+ case PIC16CC::UGT: return "gt";
+ case PIC16CC::GE: return "ge";
+ }
+ }
+
+ inline static bool isSignedComparison(PIC16CC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case PIC16CC::NE:
+ case PIC16CC::EQ:
+ case PIC16CC::LT:
+ case PIC16CC::LE:
+ case PIC16CC::GE:
+ case PIC16CC::GT:
+ return true;
+ case PIC16CC::ULT:
+ case PIC16CC::UGT:
+ case PIC16CC::ULE:
+ case PIC16CC::UGE:
+ return false; // condition codes for unsigned comparison.
+ }
+ }
+
+
+
+ FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
+ FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS,
+ PIC16TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ // Banksel optimzer pass.
+ FunctionPass *createPIC16MemSelOptimizerPass();
+} // end namespace llvm;
+
+// Defines symbolic names for PIC16 registers. This defines a mapping from
+// register name to register number.
+#include "PIC16GenRegisterNames.inc"
+
+// Defines symbolic names for the PIC16 instructions.
+#include "PIC16GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/PIC16/PIC16.td b/lib/Target/PIC16/PIC16.td
new file mode 100644
index 000000000000..b2b9b1cd171e
--- /dev/null
+++ b/lib/Target/PIC16/PIC16.td
@@ -0,0 +1,40 @@
+//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PIC16 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "PIC16RegisterInfo.td"
+include "PIC16InstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true",
+ "PIC16 Cooper ISA Support">;
+
+//===----------------------------------------------------------------------===//
+// PIC16 supported processors.
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", NoItineraries, []>;
+def : Processor<"cooper", NoItineraries, [FeatureCooper]>;
+
+
+def PIC16InstrInfo : InstrInfo {}
+
+def PIC16 : Target {
+ let InstructionSet = PIC16InstrInfo;
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
new file mode 100644
index 000000000000..ef3bc4b52a96
--- /dev/null
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -0,0 +1,404 @@
+//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16AsmPrinter.h"
+#include "PIC16TargetAsmInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+using namespace llvm;
+
+#include "PIC16GenAsmWriter.inc"
+
+bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ printInstruction(MI);
+ return true;
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ // This calls the base class function required to be called at beginning
+ // of runOnMachineFunction.
+ SetupMachineFunction(MF);
+
+ // Get the mangled name.
+ const Function *F = MF.getFunction();
+ CurrentFnName = Mang->getValueName(F);
+
+ // Emit the function variables.
+ EmitFunctionFrame(MF);
+
+ // Emit function begin debug directives
+ DbgInfo.EmitFunctBeginDI(F);
+
+ EmitAutos(CurrentFnName);
+ const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str();
+
+ const Section *fCodeSection = TAI->getNamedSection(codeSection,
+ SectionFlags::Code);
+ O << "\n";
+ // Start the Code Section.
+ SwitchToSection (fCodeSection);
+
+ // Emit the frame address of the function at the beginning of code.
+ O << "\tretlw low(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+ O << "\tretlw high(" << PAN::getFrameLabel(CurrentFnName) << ")\n";
+
+ // Emit function start label.
+ O << CurrentFnName << ":\n";
+
+ // For emitting line directives, we need to keep track of the current
+ // source line. When it changes then only emit the line directive.
+ unsigned CurLine = 0;
+ O << "\n";
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Emit the line directive if source line changed.
+ const DebugLoc DL = II->getDebugLoc();
+ if (!DL.isUnknown()) {
+ unsigned line = MF.getDebugLocTuple(DL).Line;
+ if (line != CurLine) {
+ O << "\t.line " << line << "\n";
+ CurLine = line;
+ }
+ }
+
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Emit function end debug directives.
+ DbgInfo.EmitFunctEndDI(F, CurLine);
+ return false; // we didn't modify anything.
+}
+
+/// createPIC16CodePrinterPass - Returns a pass that prints the PIC16
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o,
+ PIC16TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+
+// printOperand - print operand of insn.
+void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ return;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ O << Mang->getValueName(MO.getGlobal());
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *Sname = MO.getSymbolName();
+
+ // If its a libcall name, record it to decls section.
+ if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) {
+ LibcallDecls.push_back(Sname);
+ }
+
+ O << Sname;
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ default:
+ assert(0 && " Operand type not supported.");
+ }
+}
+
+void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
+}
+
+void PIC16AsmPrinter::printLibcallDecls(void) {
+ // If no libcalls used, return.
+ if (LibcallDecls.empty()) return;
+
+ O << TAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n";
+ // Remove duplicate entries.
+ LibcallDecls.sort();
+ LibcallDecls.unique();
+ for (std::list<const char*>::const_iterator I = LibcallDecls.begin();
+ I != LibcallDecls.end(); I++) {
+ O << TAI->getExternDirective() << *I << "\n";
+ O << TAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
+ O << TAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
+ }
+ O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n";
+}
+
+bool PIC16AsmPrinter::doInitialization (Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+ DbgInfo.EmitFileDirective(M);
+
+ // FIXME:: This is temporary solution to generate the include file.
+ // The processor should be passed to llc as in input and the header file
+ // should be generated accordingly.
+ O << "\n\t#include P16F1937.INC\n";
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Set the section names for all globals.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ I->setSection(TAI->SectionForGlobal(I)->getName());
+ }
+
+ EmitFunctionDecls(M);
+ EmitUndefinedVars(M);
+ EmitDefinedVars(M);
+ EmitIData(M);
+ EmitUData(M);
+ EmitRomData(M);
+ DbgInfo.PopulateFunctsDI(M);
+ return Result;
+}
+
+// Emit extern decls for functions imported from other modules, and emit
+// global declarations for function defined in this module and which are
+// available to other modules.
+void PIC16AsmPrinter::EmitFunctionDecls (Module &M) {
+ // Emit declarations for external functions.
+ O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
+ std::string Name = Mang->getValueName(I);
+ if (Name.compare("@abort") == 0)
+ continue;
+
+ // If it is llvm intrinsic call then don't emit
+ if (Name.find("llvm.") != std::string::npos)
+ continue;
+
+ if (! (I->isDeclaration() || I->hasExternalLinkage()))
+ continue;
+
+ const char *directive = I->isDeclaration() ? TAI->getExternDirective() :
+ TAI->getGlobalDirective();
+
+ O << directive << Name << "\n";
+ O << directive << PAN::getRetvalLabel(Name) << "\n";
+ O << directive << PAN::getArgsLabel(Name) << "\n";
+ }
+
+ O << TAI->getCommentString() << "Function Declarations - END." <<"\n";
+}
+
+// Emit variables imported from other Modules.
+void PIC16AsmPrinter::EmitUndefinedVars (Module &M)
+{
+ std::vector<const GlobalVariable*> Items = PTAI->ExternalVarDecls->Items;
+ if (! Items.size()) return;
+
+ O << "\n" << TAI->getCommentString() << "Imported Variables - BEGIN" << "\n";
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << TAI->getExternDirective() << Mang->getValueName(Items[j]) << "\n";
+ }
+ O << TAI->getCommentString() << "Imported Variables - END" << "\n";
+}
+
+// Emit variables defined in this module and are available to other modules.
+void PIC16AsmPrinter::EmitDefinedVars (Module &M)
+{
+ std::vector<const GlobalVariable*> Items = PTAI->ExternalVarDefs->Items;
+ if (! Items.size()) return;
+
+ O << "\n" << TAI->getCommentString() << "Exported Variables - BEGIN" << "\n";
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << TAI->getGlobalDirective() << Mang->getValueName(Items[j]) << "\n";
+ }
+ O << TAI->getCommentString() << "Exported Variables - END" << "\n";
+}
+
+// Emit initialized data placed in ROM.
+void PIC16AsmPrinter::EmitRomData (Module &M)
+{
+
+ std::vector<const GlobalVariable*> Items = PTAI->ROSection->Items;
+ if (! Items.size()) return;
+
+ // Print ROData ection.
+ O << "\n";
+ SwitchToSection(PTAI->ROSection->S_);
+ for (unsigned j = 0; j < Items.size(); j++) {
+ O << Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ int AddrSpace = Items[j]->getType()->getAddressSpace();
+ EmitGlobalConstant(C, AddrSpace);
+ }
+}
+
+bool PIC16AsmPrinter::doFinalization(Module &M) {
+ printLibcallDecls();
+ DbgInfo.EmitVarDebugInfo(M);
+ O << "\n\t" << ".EOF";
+ O << "\n\t" << "END\n";
+ bool Result = AsmPrinter::doFinalization(M);
+ return Result;
+}
+
+void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ std::string FuncName = Mang->getValueName(F);
+ const TargetData *TD = TM.getTargetData();
+ // Emit the data section name.
+ O << "\n";
+ const char *SectionName = PAN::getFrameSectionName(CurrentFnName).c_str();
+
+ const Section *fPDataSection = TAI->getNamedSection(SectionName,
+ SectionFlags::Writeable);
+ SwitchToSection(fPDataSection);
+
+ // Emit function frame label
+ O << PAN::getFrameLabel(CurrentFnName) << ":\n";
+
+ const Type *RetType = F->getReturnType();
+ unsigned RetSize = 0;
+ if (RetType->getTypeID() != Type::VoidTyID)
+ RetSize = TD->getTypeAllocSize(RetType);
+
+ //Emit function return value space
+ // FIXME: Do not emit RetvalLable when retsize is zero. To do this
+ // we will need to avoid printing a global directive for Retval label
+ // in emitExternandGloblas.
+ if(RetSize > 0)
+ O << PAN::getRetvalLabel(CurrentFnName) << " RES " << RetSize << "\n";
+ else
+ O << PAN::getRetvalLabel(CurrentFnName) << ": \n";
+
+ // Emit variable to hold the space for function arguments
+ unsigned ArgSize = 0;
+ for (Function::const_arg_iterator argi = F->arg_begin(),
+ arge = F->arg_end(); argi != arge ; ++argi) {
+ const Type *Ty = argi->getType();
+ ArgSize += TD->getTypeAllocSize(Ty);
+ }
+
+ O << PAN::getArgsLabel(CurrentFnName) << " RES " << ArgSize << "\n";
+
+ // Emit temporary space
+ int TempSize = PTLI->GetTmpSize();
+ if (TempSize > 0 )
+ O << PAN::getTempdataLabel(CurrentFnName) << " RES " << TempSize <<"\n";
+}
+
+void PIC16AsmPrinter::EmitIData (Module &M) {
+
+ // Print all IDATA sections.
+ std::vector <PIC16Section *>IDATASections = PTAI->IDATASections;
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ O << "\n";
+ SwitchToSection(IDATASections[i]->S_);
+ std::vector<const GlobalVariable*> Items = IDATASections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string Name = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ int AddrSpace = Items[j]->getType()->getAddressSpace();
+ O << Name;
+ EmitGlobalConstant(C, AddrSpace);
+ }
+ }
+}
+
+void PIC16AsmPrinter::EmitUData (Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print all BSS sections.
+ std::vector <PIC16Section *>BSSSections = PTAI->BSSSections;
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ O << "\n";
+ SwitchToSection(BSSSections[i]->S_);
+ std::vector<const GlobalVariable*> Items = BSSSections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string Name = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ const Type *Ty = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+
+ O << Name << " " <<"RES"<< " " << Size ;
+ O << "\n";
+ }
+ }
+}
+
+void PIC16AsmPrinter::EmitAutos (std::string FunctName)
+{
+ // Section names for all globals are already set.
+
+ const TargetData *TD = TM.getTargetData();
+
+ // Now print Autos section for this function.
+ std::string SectionName = PAN::getAutosSectionName(FunctName);
+ std::vector <PIC16Section *>AutosSections = PTAI->AutosSections;
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ O << "\n";
+ if (AutosSections[i]->S_->getName() == SectionName) {
+ SwitchToSection(AutosSections[i]->S_);
+ std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string VarName = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ const Type *Ty = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ // Emit memory reserve directive.
+ O << VarName << " RES " << Size << "\n";
+ }
+ break;
+ }
+ }
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
new file mode 100644
index 000000000000..2545dfd680a8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -0,0 +1,70 @@
+//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PIC16 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16ASMPRINTER_H
+#define PIC16ASMPRINTER_H
+
+#include "PIC16.h"
+#include "PIC16TargetMachine.h"
+#include "PIC16DebugInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "PIC16TargetAsmInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <list>
+#include <string>
+
+namespace llvm {
+ struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+ explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+ PTLI = TM.getTargetLowering();
+ PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
+ }
+ private :
+ virtual const char *getPassName() const {
+ return "PIC16 Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printCCOperand(const MachineInstr *MI, int opNum);
+ bool printInstruction(const MachineInstr *MI); // definition autogenerated.
+ bool printMachineInstruction(const MachineInstr *MI);
+ void EmitFunctionDecls (Module &M);
+ void EmitUndefinedVars (Module &M);
+ void EmitDefinedVars (Module &M);
+ void EmitIData (Module &M);
+ void EmitUData (Module &M);
+ void EmitAutos (std::string FunctName);
+ void EmitRomData (Module &M);
+ void EmitFunctionFrame(MachineFunction &MF);
+ void printLibcallDecls(void);
+ protected:
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ private:
+ PIC16TargetLowering *PTLI;
+ PIC16DbgInfo DbgInfo;
+ const PIC16TargetAsmInfo *PTAI;
+ std::list<const char *> LibcallDecls; // List of extern decls.
+ };
+} // end of namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
new file mode 100644
index 000000000000..4d43811f24a6
--- /dev/null
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -0,0 +1,270 @@
+//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the helper functions for representing debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16DebugInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+PIC16DbgInfo::~PIC16DbgInfo() {
+ for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin();
+ i!=FunctNameMap.end(); i++)
+ delete i->second;
+ FunctNameMap.clear();
+}
+
+void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName) {
+ if (Ty.isBasicType(Ty.getTag())) {
+ std::string Name = "";
+ Ty.getName(Name);
+ unsigned short BaseTy = GetTypeDebugNumber(Name);
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | (0xffff & BaseTy);
+ }
+ else if (Ty.isDerivedType(Ty.getTag())) {
+ switch(Ty.getTag())
+ {
+ case dwarf::DW_TAG_pointer_type:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+ break;
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+ DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
+ }
+ else if (Ty.isCompositeType(Ty.getTag())) {
+ switch (Ty.getTag()) {
+ case dwarf::DW_TAG_array_type: {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ DIArray Elements = CTy.getTypeArray();
+ unsigned short size = 1;
+ unsigned short Dimension[4]={0,0,0,0};
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+ DISubrange SubRange = DISubrange(Element.getGV());
+ Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+ // Each dimension is represented by 2 bytes starting at byte 9.
+ Aux[8+i*2+0] = Dimension[i];
+ Aux[8+i*2+1] = Dimension[i] >> 8;
+ size = size * Dimension[i];
+ }
+ }
+ HasAux = true;
+ // In auxillary entry for array, 7th and 8th byte represent array size.
+ Aux[6] = size;
+ Aux[7] = size >> 8;
+ DIType BaseType = CTy.getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
+
+ break;
+ }
+ case dwarf:: DW_TAG_union_type:
+ case dwarf::DW_TAG_structure_type: {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+ TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+ else
+ TypeNo = TypeNo | PIC16Dbg::T_UNION;
+ CTy.getName(TypeName);
+ unsigned size = CTy.getSizeInBits()/8;
+ // 7th and 8th byte represent size.
+ HasAux = true;
+ Aux[6] = size;
+ Aux[7] = size >> 8;
+ break;
+ }
+ case dwarf::DW_TAG_enumeration_type: {
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+ break;
+ }
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+ }
+ else {
+ TypeNo = PIC16Dbg::T_NULL;
+ HasAux = false;
+ }
+ return;
+}
+
+
+unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) {
+ if (type == "char")
+ return PIC16Dbg::T_CHAR;
+ else if (type == "short")
+ return PIC16Dbg::T_SHORT;
+ else if (type == "int")
+ return PIC16Dbg::T_INT;
+ else if (type == "long")
+ return PIC16Dbg::T_LONG;
+ else if (type == "unsigned char")
+ return PIC16Dbg::T_UCHAR;
+ else if (type == "unsigned short")
+ return PIC16Dbg::T_USHORT;
+ else if (type == "unsigned int")
+ return PIC16Dbg::T_UINT;
+ else if (type == "unsigned long")
+ return PIC16Dbg::T_ULONG;
+ else
+ return 0;
+}
+
+short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+ short ClassNo;
+ if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
+ // Generating C_AUTO here fails due to error in linker. Change it once
+ // linker is fixed.
+ ClassNo = PIC16Dbg::C_STAT;
+ }
+ else if (DIGV.isLocalToUnit())
+ ClassNo = PIC16Dbg::C_STAT;
+ else
+ ClassNo = PIC16Dbg::C_EXT;
+ return ClassNo;
+}
+
+void PIC16DbgInfo::PopulateFunctsDI(Module &M) {
+ GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms");
+ if (!Root)
+ return;
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI)
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ GlobalVariable *GVSP = cast<GlobalVariable>(*UUI);
+ DISubprogram *SP = new DISubprogram(GVSP);
+ std::string Name;
+ SP->getLinkageName(Name);
+ FunctNameMap[Name] = SP;
+ }
+ return;
+}
+
+DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) {
+ return FunctNameMap[FunctName];
+}
+
+void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
+ std::string FunctName = F->getName();
+ DISubprogram *SP = getFunctDI(FunctName);
+ if (SP) {
+ std::string FunctBeginSym = ".bf." + FunctName;
+ std::string BlockBeginSym = ".bb." + FunctName;
+
+ int FunctBeginLine = SP->getLineNumber();
+ int BFAux[PIC16Dbg::AuxSize] = {0};
+ BFAux[4] = FunctBeginLine;
+ BFAux[5] = FunctBeginLine >> 8;
+ // Emit debug directives for beginning of function.
+ EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
+ EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+ EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
+ EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
+ }
+}
+
+void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
+ std::string FunctName = F->getName();
+ DISubprogram *SP = getFunctDI(FunctName);
+ if (SP) {
+ std::string FunctEndSym = ".ef." + FunctName;
+ std::string BlockEndSym = ".eb." + FunctName;
+
+ // Emit debug directives for end of function.
+ EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
+ int EFAux[PIC16Dbg::AuxSize] = {0};
+ // 5th and 6th byte stand for line number.
+ EFAux[4] = Line;
+ EFAux[5] = Line >> 8;
+ EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
+ EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
+ EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
+ }
+}
+
+/// EmitAuxEntry - Emit Auxiliary debug information.
+///
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) {
+ O << "\n\t.dim " << VarName << ", 1" ;
+ for (int i = 0; i<num; i++)
+ O << "," << Aux[i];
+}
+
+void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) {
+ O << "\n\t" << ".def "<< Name << ", debug, class = " << Class;
+}
+
+void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
+ GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
+ if (!Root)
+ return;
+
+ Constant *RootC = cast<Constant>(*Root->use_begin());
+ for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
+ UI != UE; ++UI) {
+ for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
+ UUI != UUE; ++UUI) {
+ DIGlobalVariable DIGV(cast<GlobalVariable>(*UUI));
+ DIType Ty = DIGV.getType();
+ unsigned short TypeNo = 0;
+ bool HasAux = false;
+ int Aux[PIC16Dbg::AuxSize] = { 0 };
+ std::string TypeName = "";
+ std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
+ PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+ // Emit debug info only if type information is availaible.
+ if (TypeNo != PIC16Dbg::T_NULL) {
+ O << "\n\t.type " << VarName << ", " << TypeNo;
+ short ClassNo = getClass(DIGV);
+ O << "\n\t.class " << VarName << ", " << ClassNo;
+ if (HasAux) {
+ if (TypeName != "") {
+ // Emit debug info for structure and union objects after
+ // .dim directive supports structure/union tag name in aux entry.
+ /* O << "\n\t.dim " << VarName << ", 1," << TypeName;
+ for (int i = 0; i<PIC16Dbg::AuxSize; i++)
+ O << "," << Aux[i];*/
+ }
+ else {
+ EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize);
+ }
+ }
+ }
+ }
+ }
+ O << "\n";
+}
+
+void PIC16DbgInfo::EmitFileDirective(Module &M) {
+ GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+ if (CU) {
+ DICompileUnit DIUnit(CU);
+ std::string Dir, FN;
+ O << "\n\t.file\t\"" << DIUnit.getDirectory(Dir) <<"/"
+ << DIUnit.getFilename(FN) << "\"" ;
+ }
+}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
new file mode 100644
index 000000000000..96b23da403c3
--- /dev/null
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -0,0 +1,114 @@
+//===-- PIC16DebugInfo.h - Interfaces for PIC16 Debug Information ============//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the helper functions for representing debug information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16DBG_H
+#define PIC16DBG_H
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include <map>
+
+namespace llvm {
+ namespace PIC16Dbg {
+ enum VarType {
+ T_NULL,
+ T_VOID,
+ T_CHAR,
+ T_SHORT,
+ T_INT,
+ T_LONG,
+ T_FLOAT,
+ T_DOUBLE,
+ T_STRUCT,
+ T_UNION,
+ T_ENUM,
+ T_MOE,
+ T_UCHAR,
+ T_USHORT,
+ T_UINT,
+ T_ULONG
+ };
+ enum DerivedType {
+ DT_NONE,
+ DT_PTR,
+ DT_FCN,
+ DT_ARY
+ };
+ enum TypeSize {
+ S_BASIC = 5,
+ S_DERIVED = 3
+ };
+ enum DbgClass {
+ C_NULL,
+ C_AUTO,
+ C_EXT,
+ C_STAT,
+ C_REG,
+ C_EXTDEF,
+ C_LABEL,
+ C_ULABEL,
+ C_MOS,
+ C_ARG,
+ C_STRTAG,
+ C_MOU,
+ C_UNTAG,
+ C_TPDEF,
+ C_USTATIC,
+ C_ENTAG,
+ C_MOE,
+ C_REGPARM,
+ C_FIELD,
+ C_AUTOARG,
+ C_LASTENT,
+ C_BLOCK = 100,
+ C_FCN,
+ C_EOS,
+ C_FILE,
+ C_LINE,
+ C_ALIAS,
+ C_HIDDEN,
+ C_EOF,
+ C_LIST,
+ C_SECTION,
+ C_EFCN = 255
+ };
+ enum SymbolSize {
+ AuxSize =20
+ };
+ }
+
+ class raw_ostream;
+
+ class PIC16DbgInfo {
+ std::map <std::string, DISubprogram *> FunctNameMap;
+ raw_ostream &O;
+ const TargetAsmInfo *TAI;
+ public:
+ PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {}
+ ~PIC16DbgInfo();
+ void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+ int Aux[], std::string &TypeName);
+ unsigned GetTypeDebugNumber(std::string &type);
+ short getClass(DIGlobalVariable DIGV);
+ void PopulateFunctsDI(Module &M);
+ DISubprogram *getFunctDI(std::string FunctName);
+ void EmitFunctBeginDI(const Function *F);
+ void EmitFunctEndDI(const Function *F, unsigned Line);
+ void EmitAuxEntry(const std::string VarName, int Aux[], int num);
+ inline void EmitSymbol(std::string Name, int Class);
+ void EmitVarDebugInfo(Module &M);
+ void EmitFileDirective(Module &M);
+ };
+} // end namespace llvm;
+#endif
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..6c2b8ec9747a
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -0,0 +1,59 @@
+//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-isel"
+
+#include "PIC16ISelDAGToDAG.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// createPIC16ISelDag - This pass converts a legalized DAG into a
+/// PIC16-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
+ return new PIC16DAGToDAGISel(TM);
+}
+
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void PIC16DAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/// Select - Select instructions not customized! Used for
+/// expanded, promoted and normal instructions.
+SDNode* PIC16DAGToDAGISel::Select(SDValue N) {
+
+ // Select the default instruction.
+ SDNode *ResNode = SelectCode(N);
+
+ return ResNode;
+}
+
+
+// SelectDirectAddr - Match a direct address for DAG.
+// A direct address could be a globaladdress or externalsymbol.
+bool PIC16DAGToDAGISel::SelectDirectAddr(SDValue Op, SDValue N,
+ SDValue &Address) {
+ // Return true if TGA or ES.
+ if (N.getOpcode() == ISD::TargetGlobalAddress
+ || N.getOpcode() == ISD::TargetExternalSymbol) {
+ Address = N;
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
new file mode 100644
index 000000000000..83abed3958a4
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -0,0 +1,60 @@
+//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-isel"
+
+#include "PIC16.h"
+#include "PIC16ISelLowering.h"
+#include "PIC16RegisterInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Intrinsics.h"
+using namespace llvm;
+
+namespace {
+
+class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to PIC16TargetMachine.
+ PIC16TargetMachine &TM;
+
+ /// PIC16Lowering - This object fully describes how to lower LLVM code to an
+ /// PIC16-specific SelectionDAG.
+ PIC16TargetLowering PIC16Lowering;
+
+public:
+ explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), PIC16Lowering(*TM.getTargetLowering()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "PIC16 DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual void InstructionSelect();
+
+private:
+ // Include the pieces autogenerated from the target description.
+#include "PIC16GenDAGISel.inc"
+
+ SDNode *Select(SDValue N);
+
+ // Match direct address complex pattern.
+ bool SelectDirectAddr(SDValue Op, SDValue N, SDValue &Address);
+
+};
+
+}
+
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
new file mode 100644
index 000000000000..92fdcb2c0c15
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -0,0 +1,1756 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PIC16 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-lower"
+
+#include "PIC16ISelLowering.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Function.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+
+using namespace llvm;
+
+static const char *getIntrinsicName(unsigned opcode) {
+ std::string Basename;
+ switch(opcode) {
+ default: assert (0 && "do not know intrinsic name");
+ case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
+ case RTLIB::SRA_I16: Basename = "sra.i16"; break;
+ case RTLIB::SRA_I32: Basename = "sra.i32"; break;
+
+ case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
+ case RTLIB::SHL_I16: Basename = "sll.i16"; break;
+ case RTLIB::SHL_I32: Basename = "sll.i32"; break;
+
+ case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
+ case RTLIB::SRL_I16: Basename = "srl.i16"; break;
+ case RTLIB::SRL_I32: Basename = "srl.i32"; break;
+
+ case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
+ case RTLIB::MUL_I16: Basename = "mul.i16"; break;
+ case RTLIB::MUL_I32: Basename = "mul.i32"; break;
+ }
+
+ std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+ std::string tagname = PAN::getTagName(PAN::LIBCALL);
+ std::string Fullname = prefix + tagname + Basename;
+
+ // The name has to live through program life.
+ char *tmp = new char[Fullname.size() + 1];
+ strcpy (tmp, Fullname.c_str());
+
+ return tmp;
+}
+
+// PIC16TargetLowering Constructor.
+PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
+ : TargetLowering(TM), TmpSize(0) {
+
+ Subtarget = &TM.getSubtarget<PIC16Subtarget>();
+
+ addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
+
+ setShiftAmountType(MVT::i8);
+ setShiftAmountFlavor(Extend);
+
+ // SRA library call names
+ setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8));
+ setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16));
+ setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32));
+
+ // SHL library call names
+ setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8));
+ setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16));
+ setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32));
+
+ // SRL library call names
+ setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8));
+ setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16));
+ setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32));
+
+ // MUL Library call names
+ setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8));
+ setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16));
+ setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32));
+
+ setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::i8, Legal);
+ setOperationAction(ISD::LOAD, MVT::i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i8, Legal);
+ setOperationAction(ISD::STORE, MVT::i16, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ setOperationAction(ISD::ADDE, MVT::i8, Custom);
+ setOperationAction(ISD::ADDC, MVT::i8, Custom);
+ setOperationAction(ISD::SUBE, MVT::i8, Custom);
+ setOperationAction(ISD::SUBC, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+
+ setOperationAction(ISD::OR, MVT::i8, Custom);
+ setOperationAction(ISD::AND, MVT::i8, Custom);
+ setOperationAction(ISD::XOR, MVT::i8, Custom);
+
+ setOperationAction(ISD::FrameIndex, MVT::i16, Custom);
+ setOperationAction(ISD::CALL, MVT::i16, Custom);
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i16, Expand);
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i16, Expand);
+ setOperationAction(ISD::SRA, MVT::i32, Expand);
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SHL, MVT::i16, Expand);
+ setOperationAction(ISD::SHL, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i16, Expand);
+ setOperationAction(ISD::SRL, MVT::i32, Expand);
+
+ // PIC16 does not support shift parts
+ setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+
+ // PIC16 does not have a SETCC, expand it to SELECT_CC.
+ setOperationAction(ISD::SETCC, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::i8, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+
+ //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
+ setTruncStoreAction(MVT::i16, MVT::i8, Custom);
+
+ // Now deduce the information based on the above mentioned
+ // actions
+ computeRegisterProperties();
+}
+
+// getOutFlag - Extract the flag result if the Op has it.
+static SDValue getOutFlag(SDValue &Op) {
+ // Flag is the last value of the node.
+ SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1);
+
+ assert (Flag.getValueType() == MVT::Flag
+ && "Node does not have an out Flag");
+
+ return Flag;
+}
+// Get the TmpOffset for FrameIndex
+unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) {
+ std::map<unsigned, unsigned>::iterator
+ MapIt = FiTmpOffsetMap.find(FI);
+ if (MapIt != FiTmpOffsetMap.end())
+ return MapIt->second;
+
+ // This FI (FrameIndex) is not yet mapped, so map it
+ FiTmpOffsetMap[FI] = TmpSize;
+ TmpSize += size;
+ return FiTmpOffsetMap[FI];
+}
+
+// To extract chain value from the SDValue Nodes
+// This function will help to maintain the chain extracting
+// code at one place. In case of any change in future it will
+// help maintain the code.
+static SDValue getChain(SDValue &Op) {
+ SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1);
+
+ // If the last value returned in Flag then the chain is
+ // second last value returned.
+ if (Chain.getValueType() == MVT::Flag)
+ Chain = Op.getValue(Op.getNode()->getNumValues() - 2);
+
+ // All nodes may not produce a chain. Therefore following assert
+ // verifies that the node is returning a chain only.
+ assert (Chain.getValueType() == MVT::Other
+ && "Node does not have a chain");
+
+ return Chain;
+}
+
+/// PopulateResults - Helper function to LowerOperation.
+/// If a node wants to return multiple results after lowering,
+/// it stuffs them into an array of SDValue called Results.
+
+static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) {
+ if (N.getOpcode() == ISD::MERGE_VALUES) {
+ int NumResults = N.getNumOperands();
+ for( int i = 0; i < NumResults; i++)
+ Results.push_back(N.getOperand(i));
+ }
+ else
+ Results.push_back(N);
+}
+
+MVT PIC16TargetLowering::getSetCCResultType(MVT ValType) const {
+ return MVT::i8;
+}
+
+/// The type legalizer framework of generating legalizer can generate libcalls
+/// only when the operand/result types are illegal.
+/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops.
+/// For example an arithmetic right shift. These functions are used to lower
+/// such operations that generate libcall for legal types.
+
+void
+PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call,
+ const char *Name) {
+ PIC16LibcallNames[Call] = Name;
+}
+
+const char *
+PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) {
+ return PIC16LibcallNames[Call];
+}
+
+SDValue
+PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
+ MVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned,
+ SelectionDAG &DAG, DebugLoc dl) {
+
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForMVT();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i8);
+
+ const Type *RetTy = RetVT.getTypeForMVT();
+ std::pair<SDValue,SDValue> CallInfo =
+ LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+ false, CallingConv::C, false, Callee, Args, DAG, dl);
+
+ return CallInfo.first;
+}
+
+const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case PIC16ISD::Lo: return "PIC16ISD::Lo";
+ case PIC16ISD::Hi: return "PIC16ISD::Hi";
+ case PIC16ISD::MTLO: return "PIC16ISD::MTLO";
+ case PIC16ISD::MTHI: return "PIC16ISD::MTHI";
+ case PIC16ISD::MTPCLATH: return "PIC16ISD::MTPCLATH";
+ case PIC16ISD::PIC16Connect: return "PIC16ISD::PIC16Connect";
+ case PIC16ISD::Banksel: return "PIC16ISD::Banksel";
+ case PIC16ISD::PIC16Load: return "PIC16ISD::PIC16Load";
+ case PIC16ISD::PIC16LdArg: return "PIC16ISD::PIC16LdArg";
+ case PIC16ISD::PIC16LdWF: return "PIC16ISD::PIC16LdWF";
+ case PIC16ISD::PIC16Store: return "PIC16ISD::PIC16Store";
+ case PIC16ISD::PIC16StWF: return "PIC16ISD::PIC16StWF";
+ case PIC16ISD::BCF: return "PIC16ISD::BCF";
+ case PIC16ISD::LSLF: return "PIC16ISD::LSLF";
+ case PIC16ISD::LRLF: return "PIC16ISD::LRLF";
+ case PIC16ISD::RLF: return "PIC16ISD::RLF";
+ case PIC16ISD::RRF: return "PIC16ISD::RRF";
+ case PIC16ISD::CALL: return "PIC16ISD::CALL";
+ case PIC16ISD::CALLW: return "PIC16ISD::CALLW";
+ case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC";
+ case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC";
+ case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND";
+ case PIC16ISD::Dummy: return "PIC16ISD::Dummy";
+ }
+}
+
+void PIC16TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+
+ switch (N->getOpcode()) {
+ case ISD::GlobalAddress:
+ Results.push_back(ExpandGlobalAddress(N, DAG));
+ return;
+ case ISD::ExternalSymbol:
+ Results.push_back(ExpandExternalSymbol(N, DAG));
+ return;
+ case ISD::STORE:
+ Results.push_back(ExpandStore(N, DAG));
+ return;
+ case ISD::LOAD:
+ PopulateResults(ExpandLoad(N, DAG), Results);
+ return;
+ case ISD::ADD:
+ // Results.push_back(ExpandAdd(N, DAG));
+ return;
+ case ISD::FrameIndex:
+ Results.push_back(ExpandFrameIndex(N, DAG));
+ return;
+ default:
+ assert (0 && "not implemented");
+ return;
+ }
+}
+
+SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) {
+
+ // Currently handling FrameIndex of size MVT::i16 only
+ // One example of this scenario is when return value is written on
+ // FrameIndex#0
+
+ if (N->getValueType(0) != MVT::i16)
+ return SDValue();
+
+ // Expand the FrameIndex into ExternalSymbol and a Constant node
+ // The constant will represent the frame index number
+ // Get the current function frame
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ const std::string Name = Func->getName();
+
+ FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = FR->getDebugLoc();
+ int Index = FR->getIndex();
+
+ // Expand FrameIndex like GlobalAddress and ExternalSymbol
+ // Also use Offset field for lo and hi parts. The default
+ // offset is zero.
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi);
+}
+
+
+SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ SDValue Chain = St->getChain();
+ SDValue Src = St->getValue();
+ SDValue Ptr = St->getBasePtr();
+ MVT ValueType = Src.getValueType();
+ unsigned StoreOffset = 0;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue PtrLo, PtrHi;
+ LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl);
+
+ if (ValueType == MVT::i8) {
+ return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src,
+ PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+ }
+ else if (ValueType == MVT::i16) {
+ // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
+ SDValue SrcLo, SrcHi;
+ GetExpandedParts(Src, DAG, SrcLo, SrcHi);
+ SDValue ChainLo = Chain, ChainHi = Chain;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ ChainLo = Chain.getOperand(0);
+ ChainHi = Chain.getOperand(1);
+ }
+ SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
+ ChainLo,
+ SrcLo, PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+
+ SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi,
+ SrcHi, PtrLo, PtrHi,
+ DAG.getConstant (1 + StoreOffset, MVT::i8));
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1),
+ getChain(Store2));
+ }
+ else if (ValueType == MVT::i32) {
+ // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR.
+ SDValue SrcLo, SrcHi;
+ GetExpandedParts(Src, DAG, SrcLo, SrcHi);
+
+ // Get the expanded parts of each of SrcLo and SrcHi.
+ SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2;
+ GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2);
+ GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2);
+
+ SDValue ChainLo = Chain, ChainHi = Chain;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ ChainLo = Chain.getOperand(0);
+ ChainHi = Chain.getOperand(1);
+ }
+ SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi,
+ ChainHi2 = ChainHi;
+ if (ChainLo.getOpcode() == ISD::TokenFactor) {
+ ChainLo1 = ChainLo.getOperand(0);
+ ChainLo2 = ChainLo.getOperand(1);
+ }
+ if (ChainHi.getOpcode() == ISD::TokenFactor) {
+ ChainHi1 = ChainHi.getOperand(0);
+ ChainHi2 = ChainHi.getOperand(1);
+ }
+ SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other,
+ ChainLo1,
+ SrcLo1, PtrLo, PtrHi,
+ DAG.getConstant (0 + StoreOffset, MVT::i8));
+
+ SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2,
+ SrcLo2, PtrLo, PtrHi,
+ DAG.getConstant (1 + StoreOffset, MVT::i8));
+
+ SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1,
+ SrcHi1, PtrLo, PtrHi,
+ DAG.getConstant (2 + StoreOffset, MVT::i8));
+
+ SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2,
+ SrcHi2, PtrLo, PtrHi,
+ DAG.getConstant (3 + StoreOffset, MVT::i8));
+
+ SDValue RetLo = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(Store1), getChain(Store2));
+ SDValue RetHi = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(Store3), getChain(Store4));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi);
+
+ }
+ else {
+ assert (0 && "value type not supported");
+ return SDValue();
+ }
+}
+
+SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG)
+{
+ ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = ES->getDebugLoc();
+
+ SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset);
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
+}
+
+// ExpandGlobalAddress -
+SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) {
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0));
+ // FIXME there isn't really debug info here
+ DebugLoc dl = G->getDebugLoc();
+
+ SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8,
+ G->getOffset());
+
+ SDValue Offset = DAG.getConstant(0, MVT::i8);
+ SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset);
+ SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset);
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi);
+}
+
+bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) {
+ assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!");
+
+ if (Op.getOpcode() == ISD::BUILD_PAIR) {
+ if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo)
+ return true;
+ }
+ return false;
+}
+
+// Return true if DirectAddress is in ROM_SPACE
+bool PIC16TargetLowering::isRomAddress(const SDValue &Op) {
+
+ // RomAddress is a GlobalAddress in ROM_SPACE_
+ // If the Op is not a GlobalAddress return NULL without checking
+ // anything further.
+ if (!isDirectAddress(Op))
+ return false;
+
+ // Its a GlobalAddress.
+ // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR
+ SDValue TGA = Op.getOperand(0).getOperand(0);
+ GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(TGA);
+
+ if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE)
+ return true;
+
+ // Any other address space return it false
+ return false;
+}
+
+
+// GetExpandedParts - This function is on the similiar lines as
+// the GetExpandedInteger in type legalizer is. This returns expanded
+// parts of Op in Lo and Hi.
+
+void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG,
+ SDValue &Lo, SDValue &Hi) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT NewVT = getTypeToTransformTo(N->getValueType(0));
+
+ // Extract the lo component.
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
+ DAG.getConstant(0, MVT::i8));
+
+ // extract the hi component
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op,
+ DAG.getConstant(1, MVT::i8));
+}
+
+// Legalize FrameIndex into ExternalSymbol and offset.
+void
+PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG,
+ SDValue &ES, int &Offset) {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const std::string Name = Func->getName();
+
+ FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op);
+
+ // FrameIndices are not stack offsets. But they represent the request
+ // for space on stack. That space requested may be more than one byte.
+ // Therefore, to calculate the stack offset that a FrameIndex aligns
+ // with, we need to traverse all the FrameIndices available earlier in
+ // the list and add their requested size.
+ unsigned FIndex = FR->getIndex();
+ const char *tmpName;
+ if (FIndex < ReservedFrameCount) {
+ tmpName = createESName(PAN::getFrameLabel(Name));
+ ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ Offset = 0;
+ for (unsigned i=0; i<FIndex ; ++i) {
+ Offset += MFI->getObjectSize(i);
+ }
+ } else {
+ // FrameIndex has been made for some temporary storage
+ tmpName = createESName(PAN::getTempdataLabel(Name));
+ ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex));
+ }
+
+ return;
+}
+
+// This function legalizes the PIC16 Addresses. If the Pointer is
+// -- Direct address variable residing
+// --> then a Banksel for that variable will be created.
+// -- Rom variable
+// --> then it will be treated as an indirect address.
+// -- Indirect address
+// --> then the address will be loaded into FSR
+// -- ADD with constant operand
+// --> then constant operand of ADD will be returned as Offset
+// and non-constant operand of ADD will be treated as pointer.
+// Returns the high and lo part of the address, and the offset(in case of ADD).
+
+void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG,
+ SDValue &Lo, SDValue &Hi,
+ unsigned &Offset, DebugLoc dl) {
+
+ // Offset, by default, should be 0
+ Offset = 0;
+
+ // If the pointer is ADD with constant,
+ // return the constant value as the offset
+ if (Ptr.getOpcode() == ISD::ADD) {
+ SDValue OperLeft = Ptr.getOperand(0);
+ SDValue OperRight = Ptr.getOperand(1);
+ if (OperLeft.getOpcode() == ISD::Constant) {
+ Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue();
+ Ptr = OperRight;
+ } else if (OperRight.getOpcode() == ISD::Constant) {
+ Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue();
+ Ptr = OperLeft;
+ }
+ }
+
+ // If the pointer is Type i8 and an external symbol
+ // then treat it as direct address.
+ // One example for such case is storing and loading
+ // from function frame during a call
+ if (Ptr.getValueType() == MVT::i8) {
+ switch (Ptr.getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ Lo = Ptr;
+ Hi = DAG.getConstant(1, MVT::i8);
+ return;
+ }
+ }
+
+ // Expansion of FrameIndex has Lo/Hi parts
+ if (isDirectAddress(Ptr)) {
+ SDValue TFI = Ptr.getOperand(0).getOperand(0);
+ if (TFI.getOpcode() == ISD::TargetFrameIndex) {
+ int FrameOffset;
+ LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset);
+ Hi = DAG.getConstant(1, MVT::i8);
+ Offset += FrameOffset;
+ return;
+ }
+ }
+
+ if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) {
+ // Direct addressing case for RAM variables. The Hi part is constant
+ // and the Lo part is the TGA itself.
+ Lo = Ptr.getOperand(0).getOperand(0);
+
+ // For direct addresses Hi is a constant. Value 1 for the constant
+ // signifies that banksel needs to generated for it. Value 0 for
+ // the constant signifies that banksel does not need to be generated
+ // for it. Mark it as 1 now and optimize later.
+ Hi = DAG.getConstant(1, MVT::i8);
+ return;
+ }
+
+ // Indirect addresses. Get the hi and lo parts of ptr.
+ GetExpandedParts(Ptr, DAG, Lo, Hi);
+
+ // Put the hi and lo parts into FSR.
+ Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo);
+ Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi);
+
+ return;
+}
+
+SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0));
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ DebugLoc dl = LD->getDebugLoc();
+
+ SDValue Load, Offset;
+ SDVTList Tys;
+ MVT VT, NewVT;
+ SDValue PtrLo, PtrHi;
+ unsigned LoadOffset;
+
+ // Legalize direct/indirect addresses. This will give the lo and hi parts
+ // of the address and the offset.
+ LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl);
+
+ // Load from the pointer (direct address or FSR)
+ VT = N->getValueType(0);
+ unsigned NumLoads = VT.getSizeInBits() / 8;
+ std::vector<SDValue> PICLoads;
+ unsigned iter;
+ MVT MemVT = LD->getMemoryVT();
+ if(ISD::isNON_EXTLoad(N)) {
+ for (iter=0; iter<NumLoads ; ++iter) {
+ // Add the pointer offset if any
+ Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
+ Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
+ Offset);
+ PICLoads.push_back(Load);
+ }
+ } else {
+ // If it is extended load then use PIC16Load for Memory Bytes
+ // and for all extended bytes perform action based on type of
+ // extention - i.e. SignExtendedLoad or ZeroExtendedLoad
+
+
+ // For extended loads this is the memory value type
+ // i.e. without any extension
+ MVT MemVT = LD->getMemoryVT();
+ unsigned MemBytes = MemVT.getSizeInBits() / 8;
+ unsigned ExtdBytes = VT.getSizeInBits() / 8;
+ Offset = DAG.getConstant(LoadOffset, MVT::i8);
+
+ Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ // For MemBytes generate PIC16Load with proper offset
+ for (iter=0; iter<MemBytes; ++iter) {
+ // Add the pointer offset if any
+ Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
+ Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
+ Offset);
+ PICLoads.push_back(Load);
+ }
+
+ // For SignExtendedLoad
+ if (ISD::isSEXTLoad(N)) {
+ // For all ExtdBytes use the Right Shifted(Arithmetic) Value of the
+ // highest MemByte
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, MVT::i8, Load,
+ DAG.getConstant(7, MVT::i8));
+ for (iter=MemBytes; iter<ExtdBytes; ++iter) {
+ PICLoads.push_back(SRA);
+ }
+ } else if (ISD::isZEXTLoad(N)) {
+ // ZeroExtendedLoad -- For all ExtdBytes use constant 0
+ SDValue ConstZero = DAG.getConstant(0, MVT::i8);
+ for (iter=MemBytes; iter<ExtdBytes; ++iter) {
+ PICLoads.push_back(ConstZero);
+ }
+ }
+ }
+ SDValue BP;
+
+ if (VT == MVT::i8) {
+ // Operand of Load is illegal -- Load itself is legal
+ return PICLoads[0];
+ }
+ else if (VT == MVT::i16) {
+ BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, PICLoads[0], PICLoads[1]);
+ if (MemVT == MVT::i8)
+ Chain = getChain(PICLoads[0]);
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ } else if (VT == MVT::i32) {
+ SDValue BPs[2];
+ BPs[0] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
+ PICLoads[0], PICLoads[1]);
+ BPs[1] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16,
+ PICLoads[2], PICLoads[3]);
+ BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, BPs[0], BPs[1]);
+ if (MemVT == MVT::i8)
+ Chain = getChain(PICLoads[0]);
+ else if (MemVT == MVT::i16)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ else {
+ SDValue Chains[2];
+ Chains[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[0]), getChain(PICLoads[1]));
+ Chains[1] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ getChain(PICLoads[2]), getChain(PICLoads[3]));
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chains[0], Chains[1]);
+ }
+ }
+ Tys = DAG.getVTList(VT, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain);
+}
+
+SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal shift to lower");
+
+ SDNode *N = Op.getNode();
+ SDValue Value = N->getOperand(0);
+ SDValue Amt = N->getOperand(1);
+ PIC16ISD::PIC16Libcall CallCode;
+ switch (N->getOpcode()) {
+ case ISD::SRA:
+ CallCode = PIC16ISD::SRA_I8;
+ break;
+ case ISD::SHL:
+ CallCode = PIC16ISD::SLL_I8;
+ break;
+ case ISD::SRL:
+ CallCode = PIC16ISD::SRL_I8;
+ break;
+ default:
+ assert ( 0 && "This shift is not implemented yet.");
+ return SDValue();
+ }
+ SmallVector<SDValue, 2> Ops(2);
+ Ops[0] = Value;
+ Ops[1] = Amt;
+ SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2,
+ true, DAG, N->getDebugLoc());
+ return Call;
+}
+
+void
+PIC16TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ SDValue Op = SDValue(N, 0);
+ SDValue Res;
+ unsigned i;
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS:
+ Res = LowerFORMAL_ARGUMENTS(Op, DAG); break;
+ case ISD::LOAD:
+ Res = ExpandLoad(Op.getNode(), DAG); break;
+ case ISD::CALL:
+ Res = LowerCALL(Op, DAG); break;
+ default: {
+ // All other operations are handled in LowerOperation.
+ Res = LowerOperation(Op, DAG);
+ if (Res.getNode())
+ Results.push_back(Res);
+
+ return;
+ }
+ }
+
+ N = Res.getNode();
+ unsigned NumValues = N->getNumValues();
+ for (i = 0; i < NumValues ; i++) {
+ Results.push_back(SDValue(N, i));
+ }
+}
+
+SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ return LowerADD(Op, DAG);
+ case ISD::SUB:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ return LowerSUB(Op, DAG);
+ case ISD::LOAD:
+ return ExpandLoad(Op.getNode(), DAG);
+ case ISD::STORE:
+ return ExpandStore(Op.getNode(), DAG);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return LowerShift(Op, DAG);
+ case ISD::OR:
+ case ISD::AND:
+ case ISD::XOR:
+ return LowerBinOp(Op, DAG);
+ case ISD::CALL:
+ return LowerCALL(Op, DAG);
+ case ISD::RET:
+ return LowerRET(Op, DAG);
+ case ISD::BR_CC:
+ return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG);
+ }
+ return SDValue();
+}
+
+SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
+ SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert (Op.getValueType() == MVT::i8
+ && "illegal value type to store on stack.");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Func = MF.getFunction();
+ const std::string FuncName = Func->getName();
+
+
+ // Put the value on stack.
+ // Get a stack slot index and convert to es.
+ int FI = MF.getFrameInfo()->CreateStackObject(1, 1);
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+
+ // Store the value to ES.
+ SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other,
+ DAG.getEntryNode(),
+ Op, ES,
+ DAG.getConstant (1, MVT::i8), // Banksel.
+ DAG.getConstant (GetTmpOffsetForFI(FI, 1),
+ MVT::i8));
+
+ // Load the value from ES.
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other);
+ SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store,
+ ES, DAG.getConstant (1, MVT::i8),
+ DAG.getConstant (GetTmpOffsetForFI(FI, 1),
+ MVT::i8));
+
+ return Load.getValue(0);
+}
+
+SDValue PIC16TargetLowering::
+LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // If call has no arguments then do nothing and return.
+ if (NumOps == 0)
+ return Chain;
+
+ std::vector<SDValue> Ops;
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Arg, StoreRet;
+
+ // For PIC16 ABI the arguments come after the return value.
+ unsigned RetVals = TheCall->getNumRetVals();
+ for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
+ // Get the arguments
+ Arg = TheCall->getArg(i);
+
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(Arg);
+ Ops.push_back(DataAddr_Lo);
+ Ops.push_back(DataAddr_Hi);
+ Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8));
+ Ops.push_back(InFlag);
+
+ StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
+
+ Chain = getChain(StoreRet);
+ InFlag = getOutFlag(StoreRet);
+ ArgOffset++;
+ }
+ return Chain;
+}
+
+SDValue PIC16TargetLowering::
+LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel,
+ SDValue InFlag, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+ std::string Name;
+ SDValue Arg, StoreAt;
+ MVT ArgVT;
+ unsigned Size=0;
+ unsigned ArgCount=0;
+
+ // If call has no arguments then do nothing and return.
+ if (NumOps == 0)
+ return Chain;
+
+ // FIXME: This portion of code currently assumes only
+ // primitive types being passed as arguments.
+
+ // Legalize the address before use
+ SDValue PtrLo, PtrHi;
+ unsigned AddressOffset;
+ int StoreOffset = 0;
+ LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl);
+ SDValue StoreRet;
+
+ std::vector<SDValue> Ops;
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ for (unsigned i=ArgCount, Offset = 0; i<NumOps; i++) {
+ // Get the argument
+ Arg = TheCall->getArg(i);
+ StoreOffset = (Offset + AddressOffset);
+
+ // Store the argument on frame
+
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(Arg);
+ Ops.push_back(PtrLo);
+ Ops.push_back(PtrHi);
+ Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8));
+ Ops.push_back(InFlag);
+
+ StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size());
+
+ Chain = getChain(StoreRet);
+ InFlag = getOutFlag(StoreRet);
+
+ // Update the frame offset to be used for next argument
+ ArgVT = Arg.getValueType();
+ Size = ArgVT.getSizeInBits();
+ Size = Size/8; // Calculate size in bytes
+ Offset += Size; // Increase the frame offset
+ }
+ return Chain;
+}
+
+SDValue PIC16TargetLowering::
+LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned RetVals = TheCall->getNumRetVals();
+
+ // If call does not have anything to return
+ // then do nothing and go back.
+ if (RetVals == 0)
+ return Chain;
+
+ // Call has something to return
+ std::vector<SDValue> ResultVals;
+ SDValue LoadRet;
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ for(unsigned i=0;i<RetVals;i++) {
+ LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, DataAddr_Lo,
+ DataAddr_Hi, DAG.getConstant(i, MVT::i8),
+ InFlag);
+ InFlag = getOutFlag(LoadRet);
+ Chain = getChain(LoadRet);
+ ResultVals.push_back(LoadRet);
+ }
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
+ return Res;
+}
+
+SDValue PIC16TargetLowering::
+LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel,
+ SDValue InFlag, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Currently handling primitive types only. They will come in
+ // i8 parts
+ unsigned RetVals = TheCall->getNumRetVals();
+
+ std::vector<SDValue> ResultVals;
+
+ // Return immediately if the return type is void
+ if (RetVals == 0)
+ return Chain;
+
+ // Call has something to return
+
+ // Legalize the address before use
+ SDValue LdLo, LdHi;
+ unsigned LdOffset;
+ LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl);
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ SDValue LoadRet;
+
+ for(unsigned i=0, Offset=0;i<RetVals;i++) {
+
+ LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, LdLo, LdHi,
+ DAG.getConstant(LdOffset + Offset, MVT::i8),
+ InFlag);
+
+ InFlag = getOutFlag(LoadRet);
+
+ Chain = getChain(LoadRet);
+ Offset++;
+ ResultVals.push_back(LoadRet);
+ }
+
+ // To return use MERGE_VALUES
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl);
+ return Res;
+}
+
+SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op.getNumOperands() == 1) // return void
+ return Op;
+
+ // return should have odd number of operands
+ if ((Op.getNumOperands() % 2) == 0 ) {
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ }
+
+ // Number of values to return
+ unsigned NumRet = (Op.getNumOperands() / 2);
+
+ // Function returns value always on stack with the offset starting
+ // from 0
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ std::string FuncName = F->getName();
+
+ const char *tmpName = createESName(PAN::getFrameLabel(FuncName));
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other);
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+ SDValue BS = DAG.getConstant(1, MVT::i8);
+ SDValue RetVal;
+ for(unsigned i=0;i<NumRet; ++i) {
+ RetVal = Op.getNode()->getOperand(2*i + 1);
+ Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
+ ES, BS,
+ DAG.getConstant (i, MVT::i8));
+
+ }
+ return DAG.getNode(ISD::RET, dl, MVT::Other, Chain);
+}
+
+// CALL node may have some operands non-legal to PIC16. Generate new CALL
+// node with all the operands legal.
+// Currently only Callee operand of the CALL node is non-legal. This function
+// legalizes the Callee operand and uses all other operands as are to generate
+// new CALL node.
+
+SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned i =0;
+
+ assert(Callee.getValueType() == MVT::i16 &&
+ "Don't know how to legalize this call node!!!");
+ assert(Callee.getOpcode() == ISD::BUILD_PAIR &&
+ "Don't know how to legalize this call node!!!");
+
+ if (isDirectAddress(Callee)) {
+ // Come here for direct calls
+ Callee = Callee.getOperand(0).getOperand(0);
+ } else {
+ // Come here for indirect calls
+ SDValue Lo, Hi;
+ // Indirect addresses. Get the hi and lo parts of ptr.
+ GetExpandedParts(Callee, DAG, Lo, Hi);
+ // Connect Lo and Hi parts of the callee with the PIC16Connect
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi);
+ }
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add the call arguments and their flags
+ unsigned NumArgs = TheCall->getNumArgs();
+ for(i=0;i<NumArgs;i++) {
+ Ops.push_back(TheCall->getArg(i));
+ Ops.push_back(TheCall->getArgFlagsVal(i));
+ }
+ std::vector<MVT> NodeTys;
+ unsigned NumRets = TheCall->getNumRetVals();
+ for(i=0;i<NumRets;i++)
+ NodeTys.push_back(TheCall->getRetValType(i));
+
+ // Return a Chain as well
+ NodeTys.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(&NodeTys[0], NodeTys.size());
+ // Generate new call with all the operands legal
+ return DAG.getCall(TheCall->getCallingConv(), dl,
+ TheCall->isVarArg(), TheCall->isTailCall(),
+ TheCall->isInreg(), VTs, &Ops[0], Ops.size());
+}
+
+void PIC16TargetLowering::
+GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
+ SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
+ SelectionDAG &DAG) {
+ assert (Callee.getOpcode() == PIC16ISD::PIC16Connect
+ && "Don't know what to do of such callee!!");
+ SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
+ SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+
+ // Get the Lo and Hi part of code address
+ SDValue Lo = Callee.getOperand(0);
+ SDValue Hi = Callee.getOperand(1);
+
+ SDValue Data_Lo, Data_Hi;
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag);
+ // Subtract 2 from Address to get the Lower part of DataAddress.
+ SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag);
+ Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
+ DAG.getConstant(2, MVT::i8));
+ SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
+ Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3);
+ SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
+ SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee,
+ OperFlag);
+ Chain = getChain(Call);
+ OperFlag = getOutFlag(Call);
+ SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+
+ // Low part of Data Address
+ DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag);
+
+ // Make the second call.
+ SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+
+ // Subtract 1 from Address to get high part of data address.
+ Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo,
+ DAG.getConstant(1, MVT::i8));
+ SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)};
+ Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi);
+
+ // Use new Lo to make another CALLW
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH);
+ Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag);
+ Chain = getChain(Call);
+ OperFlag = getOutFlag(Call);
+ SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+ // Hi part of Data Address
+ DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag);
+}
+
+
+SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = dyn_cast<CallSDNode>(Op);
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ DebugLoc dl = TheCall->getDebugLoc();
+ if (Callee.getValueType() == MVT::i16 &&
+ Callee.getOpcode() == ISD::BUILD_PAIR) {
+ // Control should come here only from TypeLegalizer for lowering
+
+ // Legalize the non-legal arguments of call and return the
+ // new call with legal arguments.
+ return LegalizeCALL(Op, DAG);
+ }
+ // Control should come here from Legalize DAG.
+ // Here all the operands of CALL node should be legal.
+
+ // If this is an indirect call then to pass the arguments
+ // and read the return value back, we need the data address
+ // of the function being called.
+ // To get the data address two more calls need to be made.
+
+ // The flag to track if this is a direct or indirect call.
+ bool IsDirectCall = true;
+ unsigned RetVals = TheCall->getNumRetVals();
+ unsigned NumArgs = TheCall->getNumArgs();
+
+ SDValue DataAddr_Lo, DataAddr_Hi;
+ if (Callee.getOpcode() == PIC16ISD::PIC16Connect) {
+ IsDirectCall = false; // This is indirect call
+ // Read DataAddress only if we have to pass arguments or
+ // read return value.
+ if ((RetVals > 0) || (NumArgs > 0))
+ GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG);
+ }
+
+ SDValue ZeroOperand = DAG.getConstant(0, MVT::i8);
+
+ // Start the call sequence.
+ // Carring the Constant 0 along the CALLSEQSTART
+ // because there is nothing else to carry.
+ SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand);
+ Chain = getChain(SeqStart);
+ SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency
+ std::string Name;
+
+ // For any direct call - callee will be GlobalAddressNode or
+ // ExternalSymbol
+ SDValue ArgLabel, RetLabel;
+ if (IsDirectCall) {
+ // Considering the GlobalAddressNode case here.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
+ Name = G->getGlobal()->getName();
+ } else {// Considering the ExternalSymbol case here
+ ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
+ Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8);
+ Name = ES->getSymbol();
+ }
+
+ // Label for argument passing
+ const char *argFrame = createESName(PAN::getArgsLabel(Name));
+ ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8);
+
+ // Label for reading return value
+ const char *retName = createESName(PAN::getRetvalLabel(Name));
+ RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8);
+ } else {
+ // if indirect call
+ SDValue CodeAddr_Lo = Callee.getOperand(0);
+ SDValue CodeAddr_Hi = Callee.getOperand(1);
+
+ /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo,
+ DAG.getConstant(2, MVT::i8));*/
+
+ // move Hi part in PCLATH
+ CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi);
+ Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo,
+ CodeAddr_Hi);
+ }
+
+ // Pass the argument to function before making the call.
+ SDValue CallArgs;
+ if (IsDirectCall) {
+ CallArgs = LowerDirectCallArguments(Op, Chain, ArgLabel, OperFlag, DAG);
+ Chain = getChain(CallArgs);
+ OperFlag = getOutFlag(CallArgs);
+ } else {
+ CallArgs = LowerIndirectCallArguments(Op, Chain, OperFlag, DataAddr_Lo,
+ DataAddr_Hi, DAG);
+ Chain = getChain(CallArgs);
+ OperFlag = getOutFlag(CallArgs);
+ }
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee,
+ OperFlag);
+ Chain = getChain(PICCall);
+ OperFlag = getOutFlag(PICCall);
+
+
+ // Carrying the Constant 0 along the CALLSEQSTART
+ // because there is nothing else to carry.
+ SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand,
+ OperFlag);
+ Chain = getChain(SeqEnd);
+ OperFlag = getOutFlag(SeqEnd);
+
+ // Lower the return value reading after the call.
+ if (IsDirectCall)
+ return LowerDirectCallReturn(Op, Chain, RetLabel, OperFlag, DAG);
+ else
+ return LowerIndirectCallReturn(Op, Chain, OperFlag, DataAddr_Lo,
+ DataAddr_Hi, DAG);
+}
+
+bool PIC16TargetLowering::isDirectLoad(const SDValue Op) {
+ if (Op.getOpcode() == PIC16ISD::PIC16Load)
+ if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress
+ || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol)
+ return true;
+ return false;
+}
+
+// NeedToConvertToMemOp - Returns true if one of the operands of the
+// operation 'Op' needs to be put into memory. Also returns the
+// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has
+// no instruction that can operation on two registers. Most insns take
+// one register and one memory operand (addwf) / Constant (addlw).
+bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp) {
+ // If one of the operand is a constant, return false.
+ if (Op.getOperand(0).getOpcode() == ISD::Constant ||
+ Op.getOperand(1).getOpcode() == ISD::Constant)
+ return false;
+
+ // Return false if one of the operands is already a direct
+ // load and that operand has only one use.
+ if (isDirectLoad(Op.getOperand(0))) {
+ if (Op.getOperand(0).hasOneUse())
+ return false;
+ else
+ MemOp = 0;
+ }
+ if (isDirectLoad(Op.getOperand(1))) {
+ if (Op.getOperand(1).hasOneUse())
+ return false;
+ else
+ MemOp = 1;
+ }
+ return true;
+}
+
+// LowerBinOp - Lower a commutative binary operation that does not
+// affect status flag carry.
+SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal Op to lower");
+
+ unsigned MemOp = 1;
+ if (NeedToConvertToMemOp(Op, MemOp)) {
+ // Put one value on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
+
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ }
+ else {
+ return Op;
+ }
+}
+
+// LowerADD - Lower all types of ADD operations including the ones
+// that affects carry.
+SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) {
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal add to lower");
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned MemOp = 1;
+ if (NeedToConvertToMemOp(Op, MemOp)) {
+ // Put one value on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl);
+
+ // ADDC and ADDE produce two results.
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
+
+ // ADDE has three operands, the last one is the carry bit.
+ if (Op.getOpcode() == ISD::ADDE)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
+ NewVal, Op.getOperand(2));
+ // ADDC has two operands.
+ else if (Op.getOpcode() == ISD::ADDC)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ // ADD it is. It produces only one result.
+ else
+ return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1),
+ NewVal);
+ }
+ else
+ return Op;
+}
+
+SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // We should have handled larger operands in type legalizer itself.
+ assert (Op.getValueType() == MVT::i8 && "illegal sub to lower");
+
+ // Nothing to do if the first operand is already a direct load and it has
+ // only one use.
+ if (isDirectLoad(Op.getOperand(0)) && Op.getOperand(0).hasOneUse())
+ return Op;
+
+ // Put first operand on stack.
+ SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl);
+
+ SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag);
+ if (Op.getOpcode() == ISD::SUBE)
+ return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1),
+ Op.getOperand(2));
+ else
+ return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1));
+}
+
+void PIC16TargetLowering::InitReservedFrameCount(const Function *F) {
+ unsigned NumArgs = F->arg_size();
+
+ bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID);
+
+ if (isVoidFunc)
+ ReservedFrameCount = NumArgs;
+ else
+ ReservedFrameCount = NumArgs + 1;
+}
+
+// LowerFORMAL_ARGUMENTS - Argument values are loaded from the
+// <fname>.args + offset. All arguments are already broken to leaglized
+// types, so the offset just runs from 0 to NumArgVals - 1.
+
+SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue, 8> ArgValues;
+ unsigned NumArgVals = Op.getNode()->getNumValues() - 1;
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Chain = Op.getOperand(0); // Formal arguments' chain
+
+
+ // Get the callee's name to create the <fname>.args label to pass args.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ std::string FuncName = F->getName();
+
+ // Reset the map of FI and TmpOffset
+ ResetTmpOffsetMap();
+ // Initialize the ReserveFrameCount
+ InitReservedFrameCount(F);
+
+ // Create the <fname>.args external symbol.
+ const char *tmpName = createESName(PAN::getArgsLabel(FuncName));
+ SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
+
+ // Load arg values from the label + offset.
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other);
+ SDValue BS = DAG.getConstant(1, MVT::i8);
+ for (unsigned i = 0; i < NumArgVals ; ++i) {
+ SDValue Offset = DAG.getConstant(i, MVT::i8);
+ SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS,
+ Offset);
+ Chain = getChain(PICLoad);
+ ArgValues.push_back(PICLoad);
+ }
+
+ // Return a MERGE_VALUE node.
+ ArgValues.push_back(Op.getOperand(0));
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+// Perform DAGCombine of PIC16Load.
+// FIXME - Need a more elaborate comment here.
+SDValue PIC16TargetLowering::
+PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Chain = N->getOperand(0);
+ if (N->hasNUsesOfValue(0, 0)) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain);
+ }
+ return SDValue();
+}
+
+// For all the functions with arguments some STORE nodes are generated
+// that store the argument on the frameindex. However in PIC16 the arguments
+// are passed on stack only. Therefore these STORE nodes are redundant.
+// To remove these STORE nodes will be removed in PerformStoreCombine
+//
+// Currently this function is doint nothing and will be updated for removing
+// unwanted store operations
+SDValue PIC16TargetLowering::
+PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+ return SDValue(N, 0);
+ /*
+ // Storing an undef value is of no use, so remove it
+ if (isStoringUndef(N, Chain, DAG)) {
+ return Chain; // remove the store and return the chain
+ }
+ //else everything is ok.
+ return SDValue(N, 0);
+ */
+}
+
+SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ switch (N->getOpcode()) {
+ case ISD::STORE:
+ return PerformStoreCombine(N, DCI);
+ case PIC16ISD::PIC16Load:
+ return PerformPIC16LoadCombine(N, DCI);
+ }
+ return SDValue();
+}
+
+static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return PIC16CC::NE;
+ case ISD::SETEQ: return PIC16CC::EQ;
+ case ISD::SETGT: return PIC16CC::GT;
+ case ISD::SETGE: return PIC16CC::GE;
+ case ISD::SETLT: return PIC16CC::LT;
+ case ISD::SETLE: return PIC16CC::LE;
+ case ISD::SETULT: return PIC16CC::ULT;
+ case ISD::SETULE: return PIC16CC::LE;
+ case ISD::SETUGE: return PIC16CC::GE;
+ case ISD::SETUGT: return PIC16CC::UGT;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ CC == ISD::SETNE &&
+ (LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+// Returns appropriate CMP insn and corresponding condition code in PIC16CC
+SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS,
+ unsigned CC, SDValue &PIC16CC,
+ SelectionDAG &DAG, DebugLoc dl) {
+ PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC;
+
+ // PIC16 sub is literal - W. So Swap the operands and condition if needed.
+ // i.e. a < 12 can be rewritten as 12 > a.
+ if (RHS.getOpcode() == ISD::Constant) {
+
+ SDValue Tmp = LHS;
+ LHS = RHS;
+ RHS = Tmp;
+
+ switch (CondCode) {
+ default: break;
+ case PIC16CC::LT:
+ CondCode = PIC16CC::GT;
+ break;
+ case PIC16CC::GT:
+ CondCode = PIC16CC::LT;
+ break;
+ case PIC16CC::ULT:
+ CondCode = PIC16CC::UGT;
+ break;
+ case PIC16CC::UGT:
+ CondCode = PIC16CC::ULT;
+ break;
+ case PIC16CC::GE:
+ CondCode = PIC16CC::LE;
+ break;
+ case PIC16CC::LE:
+ CondCode = PIC16CC::GE;
+ break;
+ case PIC16CC::ULE:
+ CondCode = PIC16CC::UGE;
+ break;
+ case PIC16CC::UGE:
+ CondCode = PIC16CC::ULE;
+ break;
+ }
+ }
+
+ PIC16CC = DAG.getConstant(CondCode, MVT::i8);
+
+ // These are signed comparisons.
+ SDValue Mask = DAG.getConstant(128, MVT::i8);
+ if (isSignedComparison(CondCode)) {
+ LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask);
+ RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask);
+ }
+
+ SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag);
+ // We can use a subtract operation to set the condition codes. But
+ // we need to put one operand in memory if required.
+ // Nothing to do if the first operand is already a valid type (direct load
+ // for subwf and literal for sublw) and it is used by this operation only.
+ if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS))
+ && LHS.hasOneUse())
+ return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
+
+ // else convert the first operand to mem.
+ LHS = ConvertToMemOperand (LHS, DAG, dl);
+ return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS);
+}
+
+
+SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ unsigned ORIGCC = ~0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ // i.e.
+ // A setcc: lhs, rhs, cc is expanded by llvm to
+ // select_cc: result of setcc, 0, 1, 0, setne
+ // We can think of it as:
+ // select_cc: lhs, rhs, 1, 0, cc
+ LookThroughSetCC(LHS, RHS, CC, ORIGCC);
+ if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
+
+ SDValue PIC16CC;
+ SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
+
+ return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal,
+ FalseVal, PIC16CC, Cmp.getValue(1));
+}
+
+MachineBasicBlock *
+PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+
+SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2); // LHS of the condition.
+ SDValue RHS = Op.getOperand(3); // RHS of the condition.
+ SDValue Dest = Op.getOperand(4); // BB to jump to
+ unsigned ORIGCC = ~0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, ORIGCC);
+ if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC);
+
+ // Get the Compare insn and condition code.
+ SDValue PIC16CC;
+ SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl);
+
+ return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC,
+ Cmp.getValue(1));
+}
+
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
new file mode 100644
index 000000000000..ca9650d6b19e
--- /dev/null
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -0,0 +1,227 @@
+//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PIC16 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16ISELLOWERING_H
+#define PIC16ISELLOWERING_H
+
+#include "PIC16.h"
+#include "PIC16Subtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include <map>
+
+namespace llvm {
+ namespace PIC16ISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ Lo, // Low 8-bits of GlobalAddress.
+ Hi, // High 8-bits of GlobalAddress.
+ PIC16Load,
+ PIC16LdArg, // This is replica of PIC16Load but used to load function
+ // arguments and is being used for facilitating for some
+ // store removal optimizations.
+
+ PIC16LdWF,
+ PIC16Store,
+ PIC16StWF,
+ Banksel,
+ MTLO, // Move to low part of FSR
+ MTHI, // Move to high part of FSR
+ MTPCLATH, // Move to PCLATCH
+ PIC16Connect, // General connector for PIC16 nodes
+ BCF,
+ LSLF, // PIC16 Logical shift left
+ LRLF, // PIC16 Logical shift right
+ RLF, // Rotate left through carry
+ RRF, // Rotate right through carry
+ CALL, // PIC16 Call instruction
+ CALLW, // PIC16 CALLW instruction
+ SUBCC, // Compare for equality or inequality.
+ SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond.
+ BRCOND, // Conditional branch.
+ Dummy
+ };
+
+ // Keep track of different address spaces.
+ enum AddressSpace {
+ RAM_SPACE = 0, // RAM address space
+ ROM_SPACE = 1 // ROM address space number is 1
+ };
+ enum PIC16Libcall {
+ MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1,
+ SRA_I8,
+ SLL_I8,
+ SRL_I8,
+ PIC16UnknownCall
+ };
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class PIC16TargetLowering : public TargetLowering {
+ public:
+ explicit PIC16TargetLowering(PIC16TargetMachine &TM);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT ValType) const;
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSUB(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ // Call returns
+ SDValue
+ LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue FrameAddress,
+ SDValue InFlag, SelectionDAG &DAG);
+ SDValue
+ LowerIndirectCallReturn(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ // Call arguments
+ SDValue
+ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue FrameAddress,
+ SDValue InFlag, SelectionDAG &DAG);
+
+ SDValue
+ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag,
+ SDValue DataAddr_Lo, SDValue DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC,
+ SelectionDAG &DAG, DebugLoc dl);
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG);
+ virtual void LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG);
+
+ SDValue ExpandStore(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG);
+ SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG);
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ // This function returns the Tmp Offset for FrameIndex. If any TmpOffset
+ // already exists for the FI then it returns the same else it creates the
+ // new offset and returns.
+ unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size);
+ void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); }
+ void InitReservedFrameCount(const Function *F);
+
+ // Return the size of Tmp variable
+ unsigned GetTmpSize() { return TmpSize; }
+ void SetTmpSize(unsigned Size) { TmpSize = Size; }
+
+ private:
+ // If the Node is a BUILD_PAIR representing a direct Address,
+ // then this function will return true.
+ bool isDirectAddress(const SDValue &Op);
+
+ // If the Node is a DirectAddress in ROM_SPACE then this
+ // function will return true
+ bool isRomAddress(const SDValue &Op);
+
+ // Extract the Lo and Hi component of Op.
+ void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo,
+ SDValue &Hi);
+
+
+ // Load pointer can be a direct or indirect address. In PIC16 direct
+ // addresses need Banksel and Indirect addresses need to be loaded to
+ // FSR first. Handle address specific cases here.
+ void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain,
+ SDValue &NewPtr, unsigned &Offset, DebugLoc dl);
+
+ // FrameIndex should be broken down into ExternalSymbol and FrameOffset.
+ void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES,
+ int &Offset);
+
+
+ // CALL node should have all legal operands only. Legalize all non-legal
+ // operands of CALL node and then return the new call will all operands
+ // legal.
+ SDValue LegalizeCALL(SDValue Op, SelectionDAG &DAG);
+
+ // For indirect calls data address of the callee frame need to be
+ // extracted. This function fills the arguments DataAddr_Lo and
+ // DataAddr_Hi with the address of the callee frame.
+ void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain,
+ SDValue &DataAddr_Lo, SDValue &DataAddr_Hi,
+ SelectionDAG &DAG);
+
+ // We can not have both operands of a binary operation in W.
+ // This function is used to put one operand on stack and generate a load.
+ SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+
+ // This function checks if we need to put an operand of an operation on
+ // stack and generate a load or not.
+ bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp);
+
+ /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PIC16Subtarget *Subtarget;
+
+
+ // Extending the LIB Call framework of LLVM
+ // to hold the names of PIC16Libcalls.
+ const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall];
+
+ // To set and retrieve the lib call names.
+ void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name);
+ const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call);
+
+ // Make PIC16 Libcall.
+ SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, MVT RetVT,
+ const SDValue *Ops, unsigned NumOps, bool isSigned,
+ SelectionDAG &DAG, DebugLoc dl);
+
+ // Check if operation has a direct load operand.
+ inline bool isDirectLoad(const SDValue Op);
+
+ private:
+ // The frameindexes generated for spill/reload are stack based.
+ // This maps maintain zero based indexes for these FIs.
+ std::map<unsigned, unsigned> FiTmpOffsetMap;
+ unsigned TmpSize;
+
+ // These are the frames for return value and argument passing
+ // These FrameIndices will be expanded to foo.frame external symbol
+ // and all others will be expanded to foo.tmp external symbol.
+ unsigned ReservedFrameCount;
+ };
+} // namespace llvm
+
+#endif // PIC16ISELLOWERING_H
diff --git a/lib/Target/PIC16/PIC16InstrFormats.td b/lib/Target/PIC16/PIC16InstrFormats.td
new file mode 100644
index 000000000000..e213ea847fc8
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrFormats.td
@@ -0,0 +1,117 @@
+//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe PIC16 instructions format
+//
+// All the possible PIC16 fields are:
+//
+// opcode - operation code.
+// f - 7-bit register file address.
+// d - 1-bit direction specifier
+// k - 8/11 bit literals
+// b - 3 bits bit num specifier
+//
+//===----------------------------------------------------------------------===//
+
+// Generic PIC16 Format
+// PIC16 Instructions are 14-bit wide.
+
+// FIXME: Add Cooper Specific Formats if any.
+
+class PIC16Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<14> Inst;
+
+ let Namespace = "PIC16";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Byte Oriented instruction class in PIC16 : <|opcode|d|f|>
+// opcode = 6 bits.
+// d = direction = 1 bit.
+// f = file register address = 7 bits.
+//===----------------------------------------------------------------------===//
+
+class ByteFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ :PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<1> d;
+ bits<7> f;
+
+ let Inst{13-8} = opcode;
+
+ let Inst{7} = d;
+ let Inst{6-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit Oriented instruction class in PIC16 : <|opcode|b|f|>
+// opcode = 4 bits.
+// b = bit specifier = 3 bits.
+// f = file register address = 7 bits.
+//===----------------------------------------------------------------------===//
+
+class BitFormat<bits<4> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<3> b;
+ bits<7> f;
+
+ let Inst{13-10} = opcode;
+
+ let Inst{9-7} = b;
+ let Inst{6-0} = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Literal Format instruction class in PIC16 : <|opcode|k|>
+// opcode = 6 bits
+// k = literal = 8 bits
+//===----------------------------------------------------------------------===//
+
+class LiteralFormat<bits<6> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<8> k;
+
+ let Inst{13-8} = opcode;
+
+ let Inst{7-0} = k;
+}
+
+//===----------------------------------------------------------------------===//
+// Control Format instruction class in PIC16 : <|opcode|k|>
+// opcode = 3 bits.
+// k = jump address = 11 bits.
+//===----------------------------------------------------------------------===//
+
+class ControlFormat<bits<3> opcode, dag outs, dag ins, string asmstr,
+ list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ bits<11> k;
+
+ let Inst{13-11} = opcode;
+
+ let Inst{10-0} = k;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class in PIC16
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : PIC16Inst<outs, ins, asmstr, pattern> {
+ let Inst{13-6} = 0;
+}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
new file mode 100644
index 000000000000..2a769e8ad16e
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -0,0 +1,186 @@
+//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16InstrInfo.h"
+#include "PIC16TargetMachine.h"
+#include "PIC16GenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <cstdio>
+
+
+using namespace llvm;
+
+// FIXME: Add the subtarget support on this constructor.
+PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm)
+ : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)),
+ TM(tm),
+ RegInfo(*this, *TM.getSubtargetImpl()) {}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.
+/// If not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == PIC16::movwf
+ && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isSymbol()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the dest reg along with the FrameIndex of the stack slot.
+/// If not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == PIC16::movf
+ && MI->getOperand(0).isReg()
+ && MI->getOperand(1).isSymbol()) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+
+void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ PIC16TargetLowering *PTLI = TM.getTargetLowering();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ const Function *Func = MBB.getParent()->getFunction();
+ const std::string FuncName = Func->getName();
+
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+
+ // On the order of operands here: think "movwf SrcReg, tmp_slot, offset".
+ if (RC == PIC16::GPRRegisterClass) {
+ //MachineFunction &MF = *MBB.getParent();
+ //MachineRegisterInfo &RI = MF.getRegInfo();
+ BuildMI(MBB, I, DL, get(PIC16::movwf))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else if (RC == PIC16::FSR16RegisterClass) {
+ // This is a 16-bit register and the frameindex given by llvm is of
+ // size two here. Break this index N into two zero based indexes and
+ // put one into the map. The second one is always obtained by adding 1
+ // to the first zero based index. In fact it is going to use 3 slots
+ // as saving FSRs corrupts W also and hence we need to save/restore W also.
+
+ unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0
+ : PIC16::save_fsr1;
+ BuildMI(MBB, I, DL, get(opcode))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ PIC16TargetLowering *PTLI = TM.getTargetLowering();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ const Function *Func = MBB.getParent()->getFunction();
+ const std::string FuncName = Func->getName();
+
+ const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
+
+ // On the order of operands here: think "movf FrameIndex, W".
+ if (RC == PIC16::GPRRegisterClass) {
+ //MachineFunction &MF = *MBB.getParent();
+ //MachineRegisterInfo &RI = MF.getRegInfo();
+ BuildMI(MBB, I, DL, get(PIC16::movf), DestReg)
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 1))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else if (RC == PIC16::FSR16RegisterClass) {
+ // This is a 16-bit register and the frameindex given by llvm is of
+ // size two here. Break this index N into two zero based indexes and
+ // put one into the map. The second one is always obtained by adding 1
+ // to the first zero based index. In fact it is going to use 3 slots
+ // as saving FSRs corrupts W also and hence we need to save/restore W also.
+
+ unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0
+ : PIC16::restore_fsr1;
+ BuildMI(MBB, I, DL, get(opcode), DestReg)
+ .addImm(PTLI->GetTmpOffsetForFI(FI, 3))
+ .addExternalSymbol(tmpName)
+ .addImm(1); // Emit banksel for it.
+ }
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == PIC16::FSR16RegisterClass) {
+ BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ if (DestRC == PIC16::GPRRegisterClass) {
+ BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ // Not yet supported.
+ return false;
+}
+
+bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DestReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ if (MI.getOpcode() == PIC16::copy_fsr
+ || MI.getOpcode() == PIC16::copy_w) {
+ DestReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
new file mode 100644
index 000000000000..0b6767969875
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -0,0 +1,70 @@
+//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the niversity of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16INSTRUCTIONINFO_H
+#define PIC16INSTRUCTIONINFO_H
+
+#include "PIC16.h"
+#include "PIC16RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+
+class PIC16InstrInfo : public TargetInstrInfoImpl
+{
+ PIC16TargetMachine &TM;
+ const PIC16RegisterInfo RegInfo;
+public:
+ explicit PIC16InstrInfo(PIC16TargetMachine &TM);
+
+ virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; }
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ };
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
new file mode 100644
index 000000000000..c572188cef22
--- /dev/null
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -0,0 +1,522 @@
+//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PIC16 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Type Profiles.
+//===----------------------------------------------------------------------===//
+
+// Generic type profiles for i8/i16 unary/binary operations.
+// Taking one i8 or i16 and producing void.
+def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>;
+def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>;
+
+// Taking one value and producing an output of same type.
+def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>;
+def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>;
+
+// Taking two values and producing an output of same type.
+def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>;
+def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>,
+ SDTCisI16<2>]>;
+
+// Node specific type profiles.
+def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>, SDTCisI8<3>]>;
+
+def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>, SDTCisI8<3>]>;
+
+def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>,
+ SDTCisI8<2>]>;
+
+// PIC16ISD::CALL type prorile
+def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>;
+
+// PIC16ISD::BRCOND
+def SDT_PIC16Brcond: SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>;
+
+// PIC16ISD::BRCOND
+def SDT_PIC16Selecticc: SDTypeProfile<1, 3,
+ [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>,
+ SDTCisI8<3>]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 addressing modes matching via DAG.
+//===----------------------------------------------------------------------===//
+def diraddr : ComplexPattern<i8, 1, "SelectDirectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp,
+ [SDNPHasChain, SDNPOutFlag]>;
+def PIC16callseq_end : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// Low 8-bits of GlobalAddress.
+def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>;
+
+// High 8-bits of GlobalAddress.
+def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>;
+
+// The MTHI and MTLO nodes are used only to match them in the incoming
+// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions.
+// These nodes are not used for defining any instructions.
+def MTLO : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>;
+def MTHI : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>;
+def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>;
+
+// Node to generate Bank Select for a GlobalAddress.
+def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>;
+
+// Node to match a direct store operation.
+def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>;
+def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+// Node to match a direct load operation.
+def PIC16Load : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>;
+def PIC16LdArg : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>;
+def PIC16LdWF : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>;
+
+// Node to match PIC16 call
+def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call,
+ [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
+def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw,
+ [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>;
+
+// Node to match a comparison instruction.
+def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>;
+
+// Node to match a conditional branch.
+def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond,
+ [SDNPHasChain, SDNPInFlag]>;
+
+def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc,
+ [SDNPInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Operand Definitions.
+//===----------------------------------------------------------------------===//
+def i8mem : Operand<i8>;
+def brtarget: Operand<OtherVT>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i8>;
+
+include "PIC16InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PIC16 Common Classes.
+//===----------------------------------------------------------------------===//
+
+// W = W Op F : Load the value from F and do Op to W.
+let isTwoAddress = 1, mayLoad = 1 in
+class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset, W"),
+ [(set GPR:$dst, (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi),
+ (i8 imm:$offset))))]>;
+
+// F = F Op W : Load the value from F, do op with W and store in F.
+// This insn class is not marked as TwoAddress because the reg is
+// being used as a source operand only. (Remember a TwoAddress insn
+// needs a copyRegToReg.)
+let mayStore = 1 in
+class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset"),
+ [(PIC16Store (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi),
+ (i8 imm:$offset))),
+ diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)
+ )]>;
+
+// W = W Op L : Do Op of L with W and place result in W.
+let isTwoAddress = 1 in
+class BinOpLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
+ LiteralFormat<opcode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$literal),
+ !strconcat(OpcStr, " $literal"),
+ [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Instructions.
+//===----------------------------------------------------------------------===//
+
+// Pseudo-instructions.
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(PIC16callseq_start imm:$amt)]>;
+
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt),
+ "!ADJCALLSTACKUP $amt",
+ [(PIC16callseq_end imm:$amt)]>;
+
+//-----------------------------------
+// Vaious movlw insn patterns.
+//-----------------------------------
+let isReMaterializable = 1 in {
+// Move 8-bit literal to W.
+def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src),
+ "movlw $src",
+ [(set GPR:$dst, (i8 imm:$src))]>;
+
+// Move a Lo(TGA) to W.
+def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw LOW(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>;
+
+// Move a Lo(TES) to W.
+def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw LOW(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>;
+
+// Move a Hi(TGA) to W.
+def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw HIGH(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>;
+
+// Move a Hi(TES) to W.
+def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2),
+ "movlw HIGH(${src}) + ${src2}",
+ [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>;
+}
+
+//-------------------
+// FSR setting insns.
+//-------------------
+// These insns are matched via a DAG replacement pattern.
+def set_fsrlo:
+ ByteFormat<0, (outs FSR16:$fsr),
+ (ins GPR:$val),
+ "movwf ${fsr}L",
+ []>;
+
+let isTwoAddress = 1 in
+def set_fsrhi:
+ ByteFormat<0, (outs FSR16:$dst),
+ (ins FSR16:$src, GPR:$val),
+ "movwf ${dst}H",
+ []>;
+
+def set_pclath:
+ ByteFormat<0, (outs PCLATHR:$dst),
+ (ins GPR:$val),
+ "movwf ${dst}",
+ [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
+
+//----------------------------
+// copyRegToReg
+// copyRegToReg insns. These are dummy. They should always be deleted
+// by the optimizer and never be present in the final generated code.
+// if they are, then we have to write correct macros for these insns.
+//----------------------------
+def copy_fsr:
+ Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>;
+
+def copy_w:
+ Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>;
+
+class SAVE_FSR<string OpcStr>:
+ Pseudo<(outs),
+ (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo, $offset"),
+ []>;
+
+def save_fsr0: SAVE_FSR<"save_fsr0">;
+def save_fsr1: SAVE_FSR<"save_fsr1">;
+
+class RESTORE_FSR<string OpcStr>:
+ Pseudo<(outs FSR16:$dst),
+ (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo, $offset"),
+ []>;
+
+def restore_fsr0: RESTORE_FSR<"restore_fsr0">;
+def restore_fsr1: RESTORE_FSR<"restore_fsr1">;
+
+//--------------------------
+// Store to memory
+//-------------------------
+
+// Direct store.
+// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel.
+let mayStore = 1 in
+class MOVWF_INSN<bits<6> OpCode, SDNode OpNodeDest, SDNode Op>:
+ ByteFormat<0, (outs),
+ (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ "movwf ${ptrlo} + ${offset}",
+ [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi),
+ (i8 imm:$offset))]>;
+
+// Store W to a Global Address.
+def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>;
+
+// Store W to an External Symobol.
+def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>;
+
+// Store with InFlag and OutFlag
+// This is same as movwf_1 but has a flag. A flag is required to
+// order the stores while passing the params to function.
+def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>;
+
+// Indirect store. Matched via a DAG replacement pattern.
+def store_indirect :
+ ByteFormat<0, (outs),
+ (ins GPR:$val, FSR16:$fsr, i8imm:$offset),
+ "movwi $offset[$fsr]",
+ []>;
+
+//----------------------------
+// Load from memory
+//----------------------------
+// Direct load.
+// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
+// Output: dst = W
+let mayLoad = 1 in
+class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
+ ByteFormat<0, (outs GPR:$dst),
+ (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ "movf ${ptrlo} + ${offset}, W",
+ [(set GPR:$dst,
+ (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi),
+ (i8 imm:$offset)))]>;
+
+// Load from a GA.
+def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>;
+
+// Load from an ES.
+def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>;
+def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>;
+
+// Load with InFlag and OutFlag
+// This is same as movf_1 but has a flag. A flag is required to
+// order the loads while copying the return value of a function.
+def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>;
+
+// Indirect load. Matched via a DAG replacement pattern.
+def load_indirect :
+ ByteFormat<0, (outs GPR:$dst),
+ (ins FSR16:$fsr, i8imm:$offset),
+ "moviw $offset[$fsr]",
+ []>;
+
+//-------------------------
+// Bitwise operations patterns
+//--------------------------
+// W = W op [F]
+let Defs = [STATUS] in {
+def OrFW : BinOpFW<0, "iorwf", or>;
+def XOrFW : BinOpFW<0, "xorwf", xor>;
+def AndFW : BinOpFW<0, "andwf", and>;
+
+// F = W op [F]
+def OrWF : BinOpWF<0, "iorwf", or>;
+def XOrWF : BinOpWF<0, "xorwf", xor>;
+def AndWF : BinOpWF<0, "andwf", and>;
+
+//-------------------------
+// Various add/sub patterns.
+//-------------------------
+
+// W = W + [F]
+def addfw_1: BinOpFW<0, "addwf", add>;
+def addfw_2: BinOpFW<0, "addwf", addc>;
+
+let Uses = [STATUS] in
+def addfwc: BinOpFW<0, "addwfc", adde>; // With Carry.
+
+// F = W + [F]
+def addwf_1: BinOpWF<0, "addwf", add>;
+def addwf_2: BinOpWF<0, "addwf", addc>;
+let Uses = [STATUS] in
+def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry.
+}
+
+// W -= [F] ; load from F and sub the value from W.
+let isTwoAddress = 1, mayLoad = 1 in
+class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset, W"),
+ [(set GPR:$dst, (OpNode (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)),
+ GPR:$src))]>;
+let Defs = [STATUS] in {
+def subfw_1: SUBFW<0, "subwf", sub>;
+def subfw_2: SUBFW<0, "subwf", subc>;
+
+let Uses = [STATUS] in
+def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow.
+
+def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>;
+}
+
+// [F] -= W ;
+let mayStore = 1 in
+class SUBWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
+ ByteFormat<OpCode, (outs),
+ (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
+ !strconcat(OpcStr, " $ptrlo + $offset"),
+ [(PIC16Store (OpNode (PIC16Load diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset)),
+ GPR:$src), diraddr:$ptrlo,
+ (i8 imm:$ptrhi), (i8 imm:$offset))]>;
+
+let Defs = [STATUS] in {
+def subwf_1: SUBWF<0, "subwf", sub>;
+def subwf_2: SUBWF<0, "subwf", subc>;
+
+let Uses = [STATUS] in
+ def subwfb: SUBWF<0, "subwfb", sube>; // With Borrow.
+
+def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>;
+}
+
+// addlw
+let Defs = [STATUS] in {
+def addlw_1 : BinOpLW<0, "addlw", add>;
+def addlw_2 : BinOpLW<0, "addlw", addc>;
+
+let Uses = [STATUS] in
+def addlwc : BinOpLW<0, "addlwc", adde>; // With Carry. (Assembler macro).
+
+// bitwise operations involving a literal and w.
+def andlw : BinOpLW<0, "andlw", and>;
+def xorlw : BinOpLW<0, "xorlw", xor>;
+def orlw : BinOpLW<0, "iorlw", or>;
+}
+
+// sublw
+// W = C - W ; sub W from literal. (Without borrow).
+let isTwoAddress = 1 in
+class SUBLW<bits<6> opcode, SDNode OpNode> :
+ LiteralFormat<opcode, (outs GPR:$dst),
+ (ins GPR:$src, i8imm:$literal),
+ "sublw $literal",
+ [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
+
+let Defs = [STATUS] in {
+def sublw_1 : SUBLW<0, sub>;
+def sublw_2 : SUBLW<0, subc>;
+def sublw_cc : SUBLW<0, PIC16Subcc>;
+}
+
+// Call instruction.
+let isCall = 1,
+ Defs = [W, FSR0, FSR1] in {
+ def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func),
+ //"call ${func} + 2",
+ "call ${func}",
+ [(PIC16call diraddr:$func)]>;
+}
+
+let isCall = 1,
+ Defs = [W, FSR0, FSR1] in {
+ def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc),
+ "callw",
+ [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>;
+}
+
+let isCall = 1,
+ Defs = [FSR0, FSR1] in {
+ def CALLW: LiteralFormat<0x1, (outs GPR:$dest),
+ (ins GPR:$func, PCLATHR:$pc),
+ "callw",
+ [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>;
+}
+
+let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
+def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(PIC16Brcond bb:$dst, imm:$cc)]>;
+
+// Unconditional branch.
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in
+def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst),
+ "goto $dst",
+ [(br bb:$dst)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F,
+ imm:$Cond))]>;
+}
+
+
+// Banksel.
+def banksel :
+ Pseudo<(outs),
+ (ins i8mem:$ptr),
+ "banksel $ptr",
+ []>;
+
+def pagesel :
+ Pseudo<(outs),
+ (ins i8mem:$ptr),
+ "movlp $ptr",
+ []>;
+
+
+// Return insn.
+def Return :
+ ControlFormat<0, (outs), (ins), "return", [(ret)]>;
+
+//===----------------------------------------------------------------------===//
+// PIC16 Replacment Patterns.
+//===----------------------------------------------------------------------===//
+
+// Identify an indirect store and select insns for it.
+def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (store_indirect GPR:$val,
+ (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (store_indirect GPR:$val,
+ (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+// Identify an indirect load and select insns for it.
+def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
+def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr),
+ imm:$offset),
+ (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr),
+ imm:$offset)>;
+
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
new file mode 100644
index 000000000000..20f926def398
--- /dev/null
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -0,0 +1,169 @@
+//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which optimizes the emitting of banksel
+// instructions before accessing data memory. This currently works within
+// a basic block only and keep tracks of the last accessed memory bank.
+// If memory access continues to be in the same bank it just makes banksel
+// immediate, which is a part of the insn accessing the data memory, from 1
+// to zero. The asm printer emits a banksel only if that immediate is 1.
+//
+// FIXME: this is not implemented yet. The banksel pass only works on local
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-codegen"
+#include "PIC16.h"
+#include "PIC16InstrInfo.h"
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+ struct VISIBILITY_HIDDEN MemSelOpt : public MachineFunctionPass {
+ static char ID;
+ MemSelOpt() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PIC16 Memsel Optimizer";
+ }
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+ bool processInstruction(MachineInstr *MI);
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ std::string CurBank;
+
+ };
+ char MemSelOpt::ID = 0;
+}
+
+FunctionPass *llvm::createPIC16MemSelOptimizerPass() {
+ return new MemSelOpt();
+}
+
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ Changed |= processBasicBlock(MF, *I);
+ }
+
+ return Changed;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ // Let us assume that when entering a basic block now bank is selected.
+ // Ideally we should look at the predecessors for this information.
+ CurBank="";
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ Changed |= processInstruction(I);
+ }
+ return Changed;
+}
+
+bool MemSelOpt::processInstruction(MachineInstr *MI) {
+ bool Changed = false;
+
+ unsigned NumOperands = MI->getNumOperands();
+ if (NumOperands == 0) return false;
+
+
+ // If this insn is not going to access any memory, return.
+ const TargetInstrDesc &TID = TII->get(MI->getOpcode());
+ if (! (TID.isCall() || TID.mayLoad() || TID.mayStore()))
+ return false;
+
+ // Scan for the memory address operand.
+ // FIXME: Should we use standard interfaces like memoperands_iterator,
+ // hasMemOperand() etc ?
+ int MemOpPos = -1;
+ for (unsigned i = 0; i < NumOperands; i++) {
+ MachineOperand Op = MI->getOperand(i);
+ if (Op.getType() == MachineOperand::MO_GlobalAddress ||
+ Op.getType() == MachineOperand::MO_ExternalSymbol) {
+ // We found one mem operand. Next one should be BS.
+ MemOpPos = i;
+ break;
+ }
+ }
+
+ // If we did not find an insn accessing memory. Continue.
+ if (MemOpPos == -1) return Changed;
+
+ // Get the MemOp.
+ MachineOperand &Op = MI->getOperand(MemOpPos);
+
+ // If this is a pagesel material, handle it first.
+ if (MI->getOpcode() == PIC16::CALL) {
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
+ addOperand(Op);
+ return true;
+ }
+
+ // Get the section name(NewBank) for MemOp.
+ // This assumes that the section names for globals are laready set by
+ // AsmPrinter->doInitialization.
+ std::string NewBank = CurBank;
+ if (Op.getType() == MachineOperand::MO_GlobalAddress &&
+ Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
+ NewBank = Op.getGlobal()->getSection();
+ } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) {
+ // External Symbol is generated for temp data and arguments. They are
+ // in fpdata.<functionname>.# section.
+ std::string Sym = Op.getSymbolName();
+ NewBank = PAN::getSectionNameForSym(Sym);
+ }
+
+ // If the previous and new section names are same, we don't need to
+ // emit banksel.
+ if (NewBank.compare(CurBank) != 0 ) {
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
+ addOperand(Op);
+ Changed = true;
+ CurBank = NewBank;
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
new file mode 100644
index 000000000000..eb758d8543d0
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -0,0 +1,91 @@
+//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pic16-reg-info"
+
+#include "PIC16.h"
+#include "PIC16RegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+
+
+using namespace llvm;
+
+PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii,
+ const PIC16Subtarget &st)
+ : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP),
+ TII(tii),
+ ST(st) {}
+
+#include "PIC16GenRegisterInfo.inc"
+
+/// PIC16 Callee Saved Registers
+const unsigned* PIC16RegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+// PIC16 Callee Saved Reg Classes
+const TargetRegisterClass* const*
+PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved;
+}
+
+bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
+ return false;
+}
+
+void PIC16RegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{ /* NOT YET IMPLEMENTED */ }
+
+void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
+{ /* NOT YET IMPLEMENTED */ }
+
+void PIC16RegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{ /* NOT YET IMPLEMENTED */ }
+
+int PIC16RegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "Not keeping track of debug information yet!!");
+ return -1;
+}
+
+unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ assert(0 && "PIC16 Does not have any frame register");
+ return 0;
+}
+
+unsigned PIC16RegisterInfo::getRARegister() const {
+ assert(0 && "PIC16 Does not have any return address register");
+ return 0;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void PIC16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN,
+ // ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
new file mode 100644
index 000000000000..83689d0486b1
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -0,0 +1,68 @@
+//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PIC16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16REGISTERINFO_H
+#define PIC16REGISTERINFO_H
+
+#include "PIC16GenRegisterInfo.h.inc"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+
+// Forward Declarations.
+ class PIC16Subtarget;
+ class TargetInstrInfo;
+
+class PIC16RegisterInfo : public PIC16GenRegisterInfo {
+ private:
+ const TargetInstrInfo &TII;
+ const PIC16Subtarget &ST;
+
+ public:
+ PIC16RegisterInfo(const TargetInstrInfo &tii,
+ const PIC16Subtarget &st);
+
+
+ //------------------------------------------------------
+ // Pure virtual functions from TargetRegisterInfo
+ //------------------------------------------------------
+
+ // PIC16 callee saved registers
+ virtual const unsigned*
+ getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ // PIC16 callee saved register classes
+ virtual const TargetRegisterClass* const *
+ getCalleeSavedRegClasses(const MachineFunction *MF) const;
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+ virtual unsigned getFrameRegister(MachineFunction &MF) const;
+ virtual unsigned getRARegister() const;
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.td b/lib/Target/PIC16/PIC16RegisterInfo.td
new file mode 100644
index 000000000000..2959d912ec32
--- /dev/null
+++ b/lib/Target/PIC16/PIC16RegisterInfo.td
@@ -0,0 +1,33 @@
+//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PIC16 register file
+//===----------------------------------------------------------------------===//
+
+class PIC16Reg<string n> : Register<n> {
+ let Namespace = "PIC16";
+}
+
+// PIC16 Registers.
+def W : PIC16Reg<"W">;
+def FSR0 : PIC16Reg<"FSR0">;
+def FSR1 : PIC16Reg<"FSR1">;
+def BS : PIC16Reg<"BS">;
+def PCLATH : PIC16Reg<"PCLATH">;
+
+def STATUS : PIC16Reg<"STATUS">;
+
+// PIC16 Register classes.
+def GPR : RegisterClass<"PIC16", [i8], 8, [W]>;
+def FSR16 : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>;
+def BSR : RegisterClass<"PIC16", [i8], 8, [BS]>;
+def PCLATHR : RegisterClass<"PIC16", [i8], 8, [PCLATH]>;
+def STATUSR : RegisterClass<"PIC16", [i8], 8, [STATUS]>;
+
diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp
new file mode 100644
index 000000000000..db8a5d84a4bf
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Subtarget.cpp
@@ -0,0 +1,27 @@
+//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PIC16 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16Subtarget.h"
+#include "PIC16GenSubtarget.inc"
+
+using namespace llvm;
+
+PIC16Subtarget::PIC16Subtarget(const Module &M, const std::string &FS,
+ bool Cooper)
+ :IsCooper(Cooper)
+{
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h
new file mode 100644
index 000000000000..e5147a0cf892
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Subtarget.h
@@ -0,0 +1,45 @@
+//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PIC16 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16SUBTARGET_H
+#define PIC16SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class PIC16Subtarget : public TargetSubtarget {
+
+ // IsCooper - Target ISA is Cooper.
+ bool IsCooper;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ PIC16Subtarget(const Module &M, const std::string &FS, bool Cooper);
+
+ /// isCooper - Returns true if the target ISA is Cooper.
+ bool isCooper() const { return IsCooper; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif // PIC16SUBTARGET_H
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.cpp b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp
new file mode 100644
index 000000000000..d2657f018f88
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.cpp
@@ -0,0 +1,264 @@
+//===-- PIC16TargetAsmInfo.cpp - PIC16 asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PIC16TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+
+using namespace llvm;
+
+PIC16TargetAsmInfo::
+PIC16TargetAsmInfo(const PIC16TargetMachine &TM)
+ : TargetAsmInfo(TM) {
+ CommentString = ";";
+ GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
+ GlobalDirective = "\tglobal\t";
+ ExternDirective = "\textern\t";
+
+ Data8bitsDirective = " db ";
+ Data16bitsDirective = " dw ";
+ Data32bitsDirective = " dl ";
+ RomData8bitsDirective = " dw ";
+ RomData16bitsDirective = " rom_di ";
+ RomData32bitsDirective = " rom_dl ";
+ ZeroDirective = NULL;
+ AsciiDirective = " dt ";
+ AscizDirective = NULL;
+ BSSSection_ = getNamedSection("udata.# UDATA",
+ SectionFlags::Writeable | SectionFlags::BSS);
+ ReadOnlySection = getNamedSection("romdata.# ROMDATA", SectionFlags::None);
+ DataSection = getNamedSection("idata.# IDATA", SectionFlags::Writeable);
+ SwitchToSectionDirective = "";
+ // Need because otherwise a .text symbol is emitted by DwarfWriter
+ // in BeginModule, and gpasm cribbs for that .text symbol.
+ TextSection = getUnnamedSection("", SectionFlags::Code);
+ ROSection = new PIC16Section(getReadOnlySection());
+ ExternalVarDecls = new PIC16Section(getNamedSection("ExternalVarDecls"));
+ ExternalVarDefs = new PIC16Section(getNamedSection("ExternalVarDefs"));
+ // Set it to false because we weed to generate c file name and not bc file
+ // name.
+ HasSingleParameterDotFile = false;
+}
+
+const char *PIC16TargetAsmInfo::getRomDirective(unsigned size) const
+{
+ if (size == 8)
+ return RomData8bitsDirective;
+ else if (size == 16)
+ return RomData16bitsDirective;
+ else if (size == 32)
+ return RomData32bitsDirective;
+ else
+ return NULL;
+}
+
+
+const char *PIC16TargetAsmInfo::getASDirective(unsigned size,
+ unsigned AS) const {
+ if (AS == PIC16ISD::ROM_SPACE)
+ return getRomDirective(size);
+ else
+ return NULL;
+}
+
+const Section *
+PIC16TargetAsmInfo::getBSSSectionForGlobal(const GlobalVariable *GV) const {
+ assert (GV->hasInitializer() && "This global doesn't need space");
+ Constant *C = GV->getInitializer();
+ assert (C->isNullValue() && "Unitialized globals has non-zero initializer");
+
+ // Find how much space this global needs.
+ const TargetData *TD = TM.getTargetData();
+ const Type *Ty = C->getType();
+ unsigned ValSize = TD->getTypeAllocSize(Ty);
+
+ // Go through all BSS Sections and assign this variable
+ // to the first available section having enough space.
+ PIC16Section *FoundBSS = NULL;
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ if (DataBankSize - BSSSections[i]->Size >= ValSize) {
+ FoundBSS = BSSSections[i];
+ break;
+ }
+ }
+
+ // No BSS section spacious enough was found. Crate a new one.
+ if (! FoundBSS) {
+ std::string name = PAN::getUdataSectionName(BSSSections.size());
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundBSS = new PIC16Section(NewSection);
+
+ // Add this newly created BSS section to the list of BSSSections.
+ BSSSections.push_back(FoundBSS);
+ }
+
+ // Insert the GV into this BSS.
+ FoundBSS->Items.push_back(GV);
+ FoundBSS->Size += ValSize;
+
+ // We can't do this here because GV is const .
+ // const std::string SName = FoundBSS->S_->getName();
+ // GV->setSection(SName);
+
+ return FoundBSS->S_;
+}
+
+const Section *
+PIC16TargetAsmInfo::getIDATASectionForGlobal(const GlobalVariable *GV) const {
+ assert (GV->hasInitializer() && "This global doesn't need space");
+ Constant *C = GV->getInitializer();
+ assert (!C->isNullValue() && "initialized globals has zero initializer");
+ assert (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE &&
+ "can split initialized RAM data only");
+
+ // Find how much space this global needs.
+ const TargetData *TD = TM.getTargetData();
+ const Type *Ty = C->getType();
+ unsigned ValSize = TD->getTypeAllocSize(Ty);
+
+ // Go through all IDATA Sections and assign this variable
+ // to the first available section having enough space.
+ PIC16Section *FoundIDATA = NULL;
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ if ( DataBankSize - IDATASections[i]->Size >= ValSize) {
+ FoundIDATA = IDATASections[i];
+ break;
+ }
+ }
+
+ // No IDATA section spacious enough was found. Crate a new one.
+ if (! FoundIDATA) {
+ std::string name = PAN::getIdataSectionName(IDATASections.size());
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundIDATA = new PIC16Section(NewSection);
+
+ // Add this newly created IDATA section to the list of IDATASections.
+ IDATASections.push_back(FoundIDATA);
+ }
+
+ // Insert the GV into this IDATA.
+ FoundIDATA->Items.push_back(GV);
+ FoundIDATA->Size += ValSize;
+
+ // We can't do this here because GV is const .
+ // GV->setSection(FoundIDATA->S->getName());
+
+ return FoundIDATA->S_;
+}
+
+// Get the section for an automatic variable of a function.
+// For PIC16 they are globals only with mangled names.
+const Section *
+PIC16TargetAsmInfo::getSectionForAuto(const GlobalVariable *GV) const {
+
+ const std::string name = PAN::getSectionNameForSym(GV->getName());
+
+ // Go through all Auto Sections and assign this variable
+ // to the appropriate section.
+ PIC16Section *FoundAutoSec = NULL;
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ if ( AutosSections[i]->S_->getName() == name) {
+ FoundAutoSec = AutosSections[i];
+ break;
+ }
+ }
+
+ // No Auto section was found. Crate a new one.
+ if (! FoundAutoSec) {
+ const Section *NewSection = getNamedSection (name.c_str());
+
+ FoundAutoSec = new PIC16Section(NewSection);
+
+ // Add this newly created autos section to the list of AutosSections.
+ AutosSections.push_back(FoundAutoSec);
+ }
+
+ // Insert the auto into this section.
+ FoundAutoSec->Items.push_back(GV);
+
+ return FoundAutoSec->S_;
+}
+
+
+// Override default implementation to put the true globals into
+// multiple data sections if required.
+const Section*
+PIC16TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV1) const {
+ // We select the section based on the initializer here, so it really
+ // has to be a GlobalVariable.
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1);
+
+ if (!GV)
+ return TargetAsmInfo::SelectSectionForGlobal(GV1);
+
+ // Record Exteranl Var Decls.
+ if (GV->isDeclaration()) {
+ ExternalVarDecls->Items.push_back(GV);
+ return ExternalVarDecls->S_;
+ }
+
+ assert (GV->hasInitializer() && "A def without initializer?");
+
+ // First, if this is an automatic variable for a function, get the section
+ // name for it and return.
+ const std::string name = GV->getName();
+ if (PAN::isLocalName(name)) {
+ return getSectionForAuto(GV);
+ }
+
+ // Record Exteranl Var Defs.
+ if (GV->hasExternalLinkage() || GV->hasCommonLinkage()) {
+ ExternalVarDefs->Items.push_back(GV);
+ }
+
+ // See if this is an uninitialized global.
+ const Constant *C = GV->getInitializer();
+ if (C->isNullValue())
+ return getBSSSectionForGlobal(GV);
+
+ // If this is initialized data in RAM. Put it in the correct IDATA section.
+ if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE)
+ return getIDATASectionForGlobal(GV);
+
+ // This is initialized data in rom, put it in the readonly section.
+ if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) {
+ ROSection->Items.push_back(GV);
+ return ROSection->S_;
+ }
+
+ // Else let the default implementation take care of it.
+ return TargetAsmInfo::SelectSectionForGlobal(GV);
+}
+
+PIC16TargetAsmInfo::~PIC16TargetAsmInfo() {
+
+ for (unsigned i = 0; i < BSSSections.size(); i++) {
+ delete BSSSections[i];
+ }
+
+ for (unsigned i = 0; i < IDATASections.size(); i++) {
+ delete IDATASections[i];
+ }
+
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+ delete AutosSections[i];
+ }
+
+ delete ROSection;
+ delete ExternalVarDecls;
+ delete ExternalVarDefs;
+}
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h
new file mode 100644
index 000000000000..e464e36f7887
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h
@@ -0,0 +1,79 @@
+//=====-- PIC16TargetAsmInfo.h - PIC16 asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PIC16TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16TARGETASMINFO_H
+#define PIC16TARGETASMINFO_H
+
+#include "PIC16.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include <vector>
+#include "llvm/Module.h"
+#define DataBankSize 80
+namespace llvm {
+
+ // Forward declaration.
+ class PIC16TargetMachine;
+ class GlobalVariable;
+
+ // PIC16 Splits the global data into mulitple udata and idata sections.
+ // Each udata and idata section needs to contain a list of globals that
+ // they contain, in order to avoid scanning over all the global values
+ // again and printing only those that match the current section.
+ // Keeping values inside the sections make printing a section much easier.
+ struct PIC16Section {
+ const Section *S_; // Connection to actual Section.
+ unsigned Size; // Total size of the objects contained.
+ std::vector<const GlobalVariable*> Items;
+
+ PIC16Section (const Section *s) { S_ = s; Size = 0; }
+ };
+
+ struct PIC16TargetAsmInfo : public TargetAsmInfo {
+ std::string getSectionNameForSym(const std::string &Sym) const;
+ PIC16TargetAsmInfo(const PIC16TargetMachine &TM);
+ mutable std::vector<PIC16Section *> BSSSections;
+ mutable std::vector<PIC16Section *> IDATASections;
+ mutable std::vector<PIC16Section *> AutosSections;
+ mutable PIC16Section *ROSection;
+ mutable PIC16Section *ExternalVarDecls;
+ mutable PIC16Section *ExternalVarDefs;
+ virtual ~PIC16TargetAsmInfo();
+
+ private:
+ const char *RomData8bitsDirective;
+ const char *RomData16bitsDirective;
+ const char *RomData32bitsDirective;
+ const char *getRomDirective(unsigned size) const;
+ virtual const char *getASDirective(unsigned size, unsigned AS) const;
+ const Section *getBSSSectionForGlobal(const GlobalVariable *GV) const;
+ const Section *getIDATASectionForGlobal(const GlobalVariable *GV) const;
+ const Section *getSectionForAuto(const GlobalVariable *GV) const;
+ virtual const Section *SelectSectionForGlobal(const GlobalValue *GV) const;
+
+
+ public:
+ void SetSectionForGVs(Module &M);
+ std::vector<PIC16Section *> getBSSSections() const {
+ return BSSSections;
+ }
+ std::vector<PIC16Section *> getIDATASections() const {
+ return IDATASections;
+ }
+ std::vector<PIC16Section *> getAutosSections() const {
+ return AutosSections;
+ }
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
new file mode 100644
index 000000000000..bda632608ea7
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -0,0 +1,79 @@
+//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PIC16 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PIC16.h"
+#include "PIC16TargetAsmInfo.h"
+#include "PIC16TargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+
+using namespace llvm;
+
+/// PIC16TargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int PIC16TargetMachineModule;
+int PIC16TargetMachineModule = 0;
+
+
+// Register the targets
+static RegisterTarget<PIC16TargetMachine>
+X("pic16", "PIC16 14-bit [experimental].");
+static RegisterTarget<CooperTargetMachine>
+Y("cooper", "PIC16 Cooper [experimental].");
+
+// PIC16TargetMachine - Traditional PIC16 Machine.
+PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
+ bool Cooper)
+: Subtarget(M, FS, Cooper),
+ DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
+ InstrInfo(*this), TLInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
+
+// CooperTargetMachine - Uses the same PIC16TargetMachine, but makes IsCooper
+// as true.
+CooperTargetMachine::CooperTargetMachine(const Module &M, const std::string &FS)
+ : PIC16TargetMachine(M, FS, true) {}
+
+
+const TargetAsmInfo *PIC16TargetMachine::createTargetAsmInfo() const {
+ return new PIC16TargetAsmInfo(*this);
+}
+
+bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPIC16ISelDag(*this));
+ return false;
+}
+
+bool PIC16TargetMachine::
+addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+bool PIC16TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createPIC16MemSelOptimizerPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
new file mode 100644
index 000000000000..7f62d5c13d64
--- /dev/null
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -0,0 +1,76 @@
+//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PIC16 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef PIC16_TARGETMACHINE_H
+#define PIC16_TARGETMACHINE_H
+
+#include "PIC16InstrInfo.h"
+#include "PIC16ISelLowering.h"
+#include "PIC16RegisterInfo.h"
+#include "PIC16Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// PIC16TargetMachine
+///
+class PIC16TargetMachine : public LLVMTargetMachine {
+ PIC16Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PIC16InstrInfo InstrInfo;
+ PIC16TargetLowering TLInfo;
+
+ // PIC16 does not have any call stack frame, therefore not having
+ // any PIC16 specific FrameInfo class.
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ PIC16TargetMachine(const Module &M, const std::string &FS,
+ bool Cooper = false);
+
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetData *getTargetData() const { return &DataLayout;}
+ virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ virtual const PIC16RegisterInfo *getRegisterInfo() const {
+ return &(InstrInfo.getRegisterInfo());
+ }
+
+ virtual PIC16TargetLowering *getTargetLowering() const {
+ return const_cast<PIC16TargetLowering*>(&TLInfo);
+ }
+
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // PIC16TargetMachine.
+
+/// CooperTargetMachine
+class CooperTargetMachine : public PIC16TargetMachine {
+public:
+ CooperTargetMachine(const Module &M, const std::string &FS);
+}; // CooperTargetMachine.
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..1ed483ab21e9
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMPowerPCAsmPrinter
+ PPCAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMPowerPCCodeGen n)
+
+add_dependencies(LLVMPowerPCAsmPrinter ${n})
diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile
new file mode 100644
index 000000000000..269ef9204433
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' PowerPC target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..7723982f69ad
--- /dev/null
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -0,0 +1,1204 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter {
+ protected:
+ StringSet<> FnStubs, GVStubs, HiddenGVStubs;
+ const PPCSubtarget &Subtarget;
+ public:
+ explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
+
+ virtual const char *getPassName() const {
+ return "PowerPC Assembly Printer";
+ }
+
+ PPCTargetMachine &getTM() {
+ return static_cast<PPCTargetMachine&>(TM);
+ }
+
+ unsigned enumRegToMachineReg(unsigned enumReg) {
+ switch (enumReg) {
+ default: assert(0 && "Unhandled register!"); break;
+ case PPC::CR0: return 0;
+ case PPC::CR1: return 1;
+ case PPC::CR2: return 2;
+ case PPC::CR3: return 3;
+ case PPC::CR4: return 4;
+ case PPC::CR5: return 5;
+ case PPC::CR6: return 6;
+ case PPC::CR7: return 7;
+ }
+ abort();
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO);
+
+ /// stripRegisterPrefix - This method strips the character prefix from a
+ /// register name so that only the number is left. Used by for linux asm.
+ const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v': return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+ }
+
+ /// printRegister - Print register according to target requirements.
+ ///
+ void printRegister(const MachineOperand &MO, bool R0AsZero) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+
+ // If we should use 0 for R0.
+ if (R0AsZero && RegNo == PPC::R0) {
+ O << "0";
+ return;
+ }
+
+ const char *RegName = TM.getRegisterInfo()->get(RegNo).AsmName;
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ }
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ printRegister(MO, false);
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+
+ void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ char value = MI->getOperand(OpNo).getImm();
+ value = (value << (32-5)) >> (32-5);
+ O << (int)value;
+ }
+ void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImm();
+ assert(value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)value;
+ }
+ void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ unsigned char value = MI->getOperand(OpNo).getImm();
+ assert(value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)value;
+ }
+ void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (short)MI->getOperand(OpNo).getImm();
+ }
+ void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+ void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ O << (short)(MI->getOperand(OpNo).getImm()*4);
+ } else {
+ O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\")";
+ else
+ O << ')';
+ }
+ }
+ void printBranchOperand(const MachineInstr *MI, unsigned OpNo) {
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print $+8, an eight byte displacement from the PC.
+ if (MI->getOperand(OpNo).isImm()) {
+ O << "$+" << MI->getOperand(OpNo).getImm()*4;
+ } else {
+ printOp(MI->getOperand(OpNo));
+ }
+ }
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (MO.getType() == MachineOperand::MO_GlobalAddress) {
+ GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ // Dynamically-resolved functions need a stub for the function.
+ std::string Name = Mang->getValueName(GV);
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+ }
+ if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ return;
+ }
+ }
+
+ printOp(MI->getOperand(OpNo));
+ }
+ void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo) {
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+ }
+ void printPICLabel(const MachineInstr *MI, unsigned OpNo) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+ }
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "ha16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@ha";
+ }
+ }
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo);
+ } else {
+ if (Subtarget.isDarwin()) O << "lo16(";
+ printOp(MI->getOperand(OpNo));
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "-\"L" << getFunctionNumber() << "$pb\"";
+ if (Subtarget.isDarwin())
+ O << ')';
+ else
+ O << "@l";
+ }
+ }
+ void printcrbitm(const MachineInstr *MI, unsigned OpNo) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo = enumRegToMachineReg(CCReg);
+ O << (0x80 >> RegNo);
+ }
+ // The new addressing mode printers.
+ void printMemRegImm(const MachineInstr *MI, unsigned OpNo) {
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isReg() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+ void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo) {
+ if (MI->getOperand(OpNo).isImm())
+ printS16X4ImmOperand(MI, OpNo);
+ else
+ printSymbolLo(MI, OpNo);
+ O << '(';
+ if (MI->getOperand(OpNo+1).isReg() &&
+ MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1);
+ O << ')';
+ }
+
+ void printMemRegReg(const MachineInstr *MI, unsigned OpNo) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ printRegister(MO, true);
+ O << ", ";
+ printOperand(MI, OpNo+1);
+ }
+
+ void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier);
+
+ virtual bool runOnMachineFunction(MachineFunction &F) = 0;
+ virtual bool doFinalization(Module &M) = 0;
+
+ virtual void EmitExternalGlobal(const GlobalVariable *GV);
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ public:
+ explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {}
+
+ virtual const char *getPassName() const {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ raw_ostream &OS;
+ public:
+ explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0), OS(O) {}
+
+ virtual const char *getPassName() const {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ PPCAsmPrinter::getAnalysisUsage(AU);
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "PPCGenAsmWriter.inc"
+
+void PPCAsmPrinter::printOp(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ cerr << "printOp() does not handle immediate values\n";
+ abort();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ // FIXME: PIC relocation model
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName();
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ return;
+ }
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (GV->isDeclaration() || GV->isWeakForLinker()) {
+ if (GV->hasHiddenVisibility()) {
+ if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+ O << Name;
+ else {
+ HiddenGVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+ }
+ O << Name;
+
+ printOffset(MO.getOffset());
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// EmitExternalGlobal - In this case we need to use the indirect symbol.
+///
+void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) {
+ std::string Name;
+ getGlobalLinkName(GV, Name);
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (GV->hasHiddenVisibility())
+ HiddenGVStubs.insert(Name);
+ else
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ return;
+ }
+ O << Name;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ // PPC never has a prefix.
+ printOperand(MI, OpNo);
+ return false;
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ if (MI->getOperand(OpNo).isReg())
+ printMemRegReg(MI, OpNo);
+ else
+ printMemRegImm(MI, OpNo);
+ return false;
+}
+
+void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier) {
+ assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+ unsigned Code = MI->getOperand(OpNo).getImm();
+ if (!strcmp(Modifier, "cc")) {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+ case PPC::PRED_LT: O << "lt"; return;
+ case PPC::PRED_LE: O << "le"; return;
+ case PPC::PRED_EQ: O << "eq"; return;
+ case PPC::PRED_GE: O << "ge"; return;
+ case PPC::PRED_GT: O << "gt"; return;
+ case PPC::PRED_NE: O << "ne"; return;
+ case PPC::PRED_UN: O << "un"; return;
+ case PPC::PRED_NU: O << "nu"; return;
+ }
+
+ } else {
+ assert(!strcmp(Modifier, "reg") &&
+ "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ // Don't print the register for 'always'.
+ if (Code == PPC::PRED_ALWAYS) return;
+ printOperand(MI, OpNo+1);
+ }
+}
+
+
+/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ bool FoundMnemonic = false;
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; FoundMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; FoundMnemonic = true;
+ SH = 32-SH;
+ }
+ if (FoundMnemonic) {
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << '\n';
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) {
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << '\n';
+ return;
+ }
+ } else if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0);
+ O << ", ";
+ printOperand(MI, 1);
+ O << ", " << (unsigned int)SH << '\n';
+ return;
+ }
+ }
+
+ if (printInstruction(MI))
+ return; // Printer was automatically generated
+
+ assert(0 && "Unhandled instruction in asm writer!");
+ abort();
+ return;
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n'
+ << "\t.type\t" << CurrentFnName << ", @function\n";
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.global\t" << CurrentFnName << '\n';
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ EmitAlignment(2, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n';
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "DwarfWriter is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // GNU as handles section names wrapped in quotes
+ Mang->setUseQuotes(true);
+
+ SwitchToSection(TAI->getTextSection());
+
+ return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.global " << name << '\n';
+ O << "\t.type " << name << ", @object\n";
+ O << name << ":\n";
+ O << "\t.zero " << Size << '\n';
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n"
+ << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.global " << name << '\n'
+ << "\t.type " << name << ", @object\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ // TODO
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage: // Symbols default to internal.
+ break;
+ case Function::ExternalLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << "\t.weak_definition\t" << CurrentFnName << '\n';
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F);
+ O << CurrentFnName << ":\n";
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // If the function is empty, then we need to emit *something*. Otherwise, the
+ // function's label might be associated with something that it wasn't meant to
+ // be associated with. We emit a noop in this situation.
+ MachineFunction::iterator I = MF.begin();
+
+ if (++I == MF.end() && MF.front().empty())
+ O << "\tnop\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+
+bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppc64"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+ O << "\t.machine " << CPUDirectives[Directive] << '\n';
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ // Emit initial debug information.
+ // We need this for Personality functions.
+ // AsmPrinter::doInitialization should have done this analysis.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "DwarfWriter is not available");
+ DW->BeginModule(&M, MMI, O, this, TAI);
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ Mang->setUseQuotes(true);
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ SwitchToTextSection("\t.section __TEXT,__textcoal_nt,coalesced,"
+ "pure_instructions");
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ }
+ SwitchToSection(TAI->getTextSection());
+
+ return Result;
+}
+
+void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && /* FIXME: Verify correct */
+ !GVar->hasSection() &&
+ (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() ||
+ GVar->isWeakForLinker()) &&
+ TAI->SectionKindForGlobal(GVar) != SectionKind::RODataMergeStr) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasExternalLinkage()) {
+ O << "\t.globl " << name << '\n';
+ O << "\t.zerofill __DATA, __common, " << name << ", "
+ << Size << ", " << Align;
+ } else if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size << ',' << Align;
+ } else if (!GVar->hasCommonLinkage()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " ";
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ } else {
+ O << ".comm " << name << ',' << Size;
+ // Darwin 9 and above support aligned common data.
+ if (Subtarget.isDarwin9())
+ O << ',' << Align;
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::CommonLinkage:
+ O << "\t.globl " << name << '\n'
+ << "\t.weak_definition " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ default:
+ cerr << "Unknown linkage type!";
+ abort();
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << " '";
+ PrintUnmangledNameSafely(GVar, O);
+ O << "'";
+ }
+ O << '\n';
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+}
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs,"
+ "pure_instructions,32");
+ EmitAlignment(4);
+ const char *p = i->getKeyData();
+ bool hasQuote = p[0]=='\"';
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ O << "\tmflr r0\n";
+ O << "\tbcl 20,31,";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << '\n';
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ":\n";
+ O << "\tmflr r11\n";
+ O << "\taddis r11,r11,ha16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << "-";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ")\n";
+ O << "\tmtlr r0\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(";
+ else
+ O << "\tlwzu r12,lo16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << "-";
+ if (hasQuote)
+ O << "\"L0$" << &p[1];
+ else
+ O << "L0$" << p;
+ O << ")(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ } else {
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs,"
+ "pure_instructions,16");
+ EmitAlignment(4);
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ O << "\tlis r11,ha16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << ")\n";
+ if (isPPC64)
+ O << "\tldu r12,lo16(";
+ else
+ O << "\tlwzu r12,lo16(";
+ printSuffixedName(p, "$lazy_ptr");
+ O << ")(r11)\n";
+ O << "\tmtctr r12\n";
+ O << "\tbctr\n";
+ SwitchToDataSection(".lazy_symbol_pointer");
+ printSuffixedName(p, "$lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad dyld_stub_binding_helper\n";
+ else
+ O << "\t.long dyld_stub_binding_helper\n";
+ }
+ }
+
+ O << '\n';
+
+ if (TAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<Function *>& Personalities = MMI->getPersonalities();
+
+ for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I)
+ if (*I) GVStubs.insert("_" + (*I)->getName());
+ }
+
+ // Output stubs for external and common global variables.
+ if (!GVStubs.empty()) {
+ SwitchToDataSection(".non_lazy_symbol_pointer");
+ for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
+ i != e; ++i) {
+ std::string p = i->getKeyData();
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ O << "\t.indirect_symbol " << p << '\n';
+ if (isPPC64)
+ O << "\t.quad\t0\n";
+ else
+ O << "\t.long\t0\n";
+ }
+ }
+
+ if (!HiddenGVStubs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
+ i != e; ++i) {
+ std::string p = i->getKeyData();
+ EmitAlignment(isPPC64 ? 3 : 2);
+ printSuffixedName(p, "$non_lazy_ptr");
+ O << ":\n";
+ if (isPPC64)
+ O << "\t.quad\t";
+ else
+ O << "\t.long\t";
+ O << p << '\n';
+ }
+ }
+
+
+ // Emit initial debug information.
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+
+ return AsmPrinter::doFinalization(M);
+}
+
+
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o,
+ PPCTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin()) {
+ return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ } else {
+ return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ PPCTargetMachine::registerAsmPrinter(createPPCAsmPrinterPass);
+ }
+ } Registrator;
+}
+
+extern "C" int PowerPCAsmPrinterForceLink;
+int PowerPCAsmPrinterForceLink = 0;
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
new file mode 100644
index 000000000000..0b67aff2156f
--- /dev/null
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS PPC.td)
+
+tablegen(PPCGenInstrNames.inc -gen-instr-enums)
+tablegen(PPCGenRegisterNames.inc -gen-register-enums)
+tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
+tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
+tablegen(PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(PPCGenCallingConv.inc -gen-callingconv)
+tablegen(PPCGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PowerPCCodeGen
+ PPCBranchSelector.cpp
+ PPCCodeEmitter.cpp
+ PPCHazardRecognizers.cpp
+ PPCInstrInfo.cpp
+ PPCISelDAGToDAG.cpp
+ PPCISelLowering.cpp
+ PPCJITInfo.cpp
+ PPCMachOWriterInfo.cpp
+ PPCPredicates.cpp
+ PPCRegisterInfo.cpp
+ PPCSubtarget.cpp
+ PPCTargetAsmInfo.cpp
+ PPCTargetMachine.cpp
+ )
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
new file mode 100644
index 000000000000..db688970e9e7
--- /dev/null
+++ b/lib/Target/PowerPC/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMPowerPCCodeGen
+TARGET = PPC
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \
+ PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
+ PPCGenInstrInfo.inc PPCGenDAGISel.inc \
+ PPCGenSubtarget.inc PPCGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
new file mode 100644
index 000000000000..c844e21990b3
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,49 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class raw_ostream;
+
+FunctionPass *createPPCBranchSelectionPass();
+FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS,
+ PPCTargetMachine &TM,
+ CodeGenOpt::Level OptLevel, bool Verbose);
+FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+} // end namespace llvm;
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#include "PPCGenRegisterNames.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#include "PPCGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
new file mode 100644
index 000000000000..08f5bb43087f
--- /dev/null
+++ b/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,114 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+ "Enable GPUL instructions">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+ [Directive970, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+ [Directive64, FeatureAltivec,
+ FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the PPCInstrInfo.h file.
+ let TSFlagsFields = ["PPC970_First",
+ "PPC970_Single",
+ "PPC970_Cracked",
+ "PPC970_Unit"];
+ let TSFlagsShifts = [0, 1, 2, 3];
+
+ let isLittleEndianEncoding = 1;
+}
+
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+}
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 000000000000..b95a502d9187
--- /dev/null
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+ struct VISIBILITY_HIDDEN PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(&ID) {}
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt16(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 000000000000..9f916f38d5e4
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,66 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ CCIfType<[f32], CCAssignToReg<[F1]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+
+ // Vector types are always returned in V2.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Common sub-targets passes FP values in F1 - F13
+ CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
+ CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
+ // ELF32 sub-target pass FP values in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // The first 12 Vector arguments are passed in altivec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 000000000000..aa3dce19e505
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,266 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter {
+ TargetMachine &TM;
+ MachineCodeEmitter &MCE;
+ public:
+ PPCCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce):
+ TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+
+ unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO);
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+
+ void *MovePCtoLROffset;
+ };
+
+ template <class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+ public PPCCodeEmitter
+ {
+ TargetMachine &TM;
+ CodeEmitter &MCE;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ public:
+ static char ID;
+ Emitter(TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), PPCCodeEmitter(tm, mce), TM(tm), MCE(mce) {}
+
+ const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+
+ /// getValueBit - return the particular bit of Val
+ ///
+ unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; }
+ };
+
+ template <class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template <class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+ do {
+ MovePCtoLROffset = 0;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+template <class CodeEmitter>
+void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ }
+}
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) {
+
+ unsigned rv = 0; // Return value; defaults to 0 for unhandled cases
+ // or things that get fixed up later by the JIT.
+ if (MO.isReg()) {
+ rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+
+ // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the
+ // register, not the register number directly.
+ if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) {
+ rv = 0x80 >> rv;
+ }
+ } else if (MO.isImm()) {
+ rv = MO.getImm();
+ } else if (MO.isGlobal() || MO.isSymbol() ||
+ MO.isCPI() || MO.isJTI()) {
+ unsigned Reloc = 0;
+ if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
+ MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
+ MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
+ Reloc = PPC::reloc_pcrel_bx;
+ else {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ }
+ switch (MI.getOpcode()) {
+ default: MI.dump(); assert(0 && "Unknown instruction for relocation!");
+ case PPC::LIS:
+ case PPC::LIS8:
+ case PPC::ADDIS:
+ case PPC::ADDIS8:
+ Reloc = PPC::reloc_absolute_high; // Pointer to symbol
+ break;
+ case PPC::LI:
+ case PPC::LI8:
+ case PPC::LA:
+ // Loads.
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::LFS:
+ case PPC::LFD:
+
+ // Stores.
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::STFS:
+ case PPC::STFD:
+ Reloc = PPC::reloc_absolute_low;
+ break;
+
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ Reloc = PPC::reloc_absolute_low_ix;
+ break;
+ }
+ }
+
+ MachineRelocation R;
+ if (MO.isGlobal()) {
+ R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ MO.getGlobal(), 0,
+ isa<Function>(MO.getGlobal()));
+ } else if (MO.isSymbol()) {
+ R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, MO.getSymbolName(), 0);
+ } else if (MO.isCPI()) {
+ R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), 0);
+ } else {
+ assert(MO.isJTI());
+ R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, MO.getIndex(), 0);
+ }
+
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4);
+ }
+ MCE.addRelocation(R);
+
+ } else if (MO.isMBB()) {
+ unsigned Reloc = 0;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == PPC::B || Opcode == PPC::BL_Macho ||
+ Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF ||
+ Opcode == PPC::BLA_ELF)
+ Reloc = PPC::reloc_pcrel_bx;
+ else // BCC instruction
+ Reloc = PPC::reloc_pcrel_bcx;
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, MO.getMBB()));
+ } else {
+ cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+ abort();
+ }
+
+ return rv;
+}
+
+#include "PPCGenCodeEmitter.inc"
+
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
new file mode 100644
index 000000000000..1b5893da0ce2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -0,0 +1,93 @@
+//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class PPCFrameInfo: public TargetFrameInfo {
+ const TargetMachine &TM;
+
+public:
+ PPCFrameInfo(const TargetMachine &tm, bool LP64)
+ : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) {
+ }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool LP64, bool isMacho) {
+ if (isMacho)
+ return LP64 ? 16 : 8;
+ // For ELF 32 ABI:
+ return 4;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) {
+ // For MachO ABI:
+ // Use the TOC save slot in the PowerPC linkage area for saving the frame
+ // pointer (if needed.) LLVM does not generate code that uses the TOC (R2
+ // is treated as a caller saved register.)
+ if (isMacho)
+ return LP64 ? 40 : 20;
+
+ // For ELF 32 ABI:
+ // Save it right before the link register
+ return -4U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool LP64, bool isMacho) {
+ if (isMacho)
+ return 6 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ return 8;
+ }
+
+ /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+ /// argument area.
+ static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) {
+ // For Macho ABI:
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ if (isMacho)
+ return 8 * (LP64 ? 8 : 4);
+
+ // For ELF 32 ABI:
+ // There is no default stack allocated for the 8 first GPR arguments.
+ return 0;
+ }
+
+ /// getMinCallFrameSize - Return the minimum size a call frame can be using
+ /// the PowerPC ABI.
+ static unsigned getMinCallFrameSize(bool LP64, bool isMacho) {
+ // The call frame needs to be at least big enough for linkage and 8 args.
+ return getLinkageSize(LP64, isMacho) +
+ getMinCallArgumentsSize(LP64, isMacho);
+ }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 000000000000..e7658fc9d4ae
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,304 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+ : TII(tii) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DOUT << "=== Start of dispatch group\n";
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ if ((int)Opcode >= 0) {
+ isFirst = isSingle = isCracked = isLoad = isStore = false;
+ return PPCII::PPC970_Pseudo;
+ }
+ Opcode = ~Opcode;
+
+ const TargetInstrDesc &TID = TII.get(Opcode);
+
+ isLoad = TID.mayLoad();
+ isStore = TID.mayStore();
+
+ unsigned TSFlags = TID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+ return true;
+ if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StorePtr2[i] == Ptr2) {
+ if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+ if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ int StoreOffs = StoreOffset->getZExtValue();
+ int LoadOffs = LoadOffset->getZExtValue();
+ if (StoreOffs < LoadOffs) {
+ if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+ } else {
+ if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: assert(0 && "Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF))
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores) {
+ unsigned LoadSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown load!");
+ case PPC::LBZ: case PPC::LBZU:
+ case PPC::LBZX:
+ case PPC::LBZ8: case PPC::LBZU8:
+ case PPC::LBZX8:
+ case PPC::LVEBX:
+ LoadSize = 1;
+ break;
+ case PPC::LHA: case PPC::LHAU:
+ case PPC::LHAX:
+ case PPC::LHZ: case PPC::LHZU:
+ case PPC::LHZX:
+ case PPC::LVEHX:
+ case PPC::LHBRX:
+ case PPC::LHA8: case PPC::LHAU8:
+ case PPC::LHAX8:
+ case PPC::LHZ8: case PPC::LHZU8:
+ case PPC::LHZX8:
+ LoadSize = 2;
+ break;
+ case PPC::LFS: case PPC::LFSU:
+ case PPC::LFSX:
+ case PPC::LWZ: case PPC::LWZU:
+ case PPC::LWZX:
+ case PPC::LWA:
+ case PPC::LWAX:
+ case PPC::LVEWX:
+ case PPC::LWBRX:
+ case PPC::LWZ8:
+ case PPC::LWZX8:
+ LoadSize = 4;
+ break;
+ case PPC::LFD: case PPC::LFDU:
+ case PPC::LFDX:
+ case PPC::LD: case PPC::LDU:
+ case PPC::LDX:
+ LoadSize = 8;
+ break;
+ case PPC::LVX:
+ case PPC::LVXL:
+ LoadSize = 16;
+ break;
+ }
+
+ if (isLoadOfStoredAddress(LoadSize,
+ Node->getOperand(0), Node->getOperand(1)))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+ unsigned Opcode = Node->getMachineOpcode();
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore) {
+ unsigned ThisStoreSize;
+ switch (Opcode) {
+ default: assert(0 && "Unknown store instruction!");
+ case PPC::STB: case PPC::STB8:
+ case PPC::STBU: case PPC::STBU8:
+ case PPC::STBX: case PPC::STBX8:
+ case PPC::STVEBX:
+ ThisStoreSize = 1;
+ break;
+ case PPC::STH: case PPC::STH8:
+ case PPC::STHU: case PPC::STHU8:
+ case PPC::STHX: case PPC::STHX8:
+ case PPC::STVEHX:
+ case PPC::STHBRX:
+ ThisStoreSize = 2;
+ break;
+ case PPC::STFS:
+ case PPC::STFSU:
+ case PPC::STFSX:
+ case PPC::STWX: case PPC::STWX8:
+ case PPC::STWUX:
+ case PPC::STW: case PPC::STW8:
+ case PPC::STWU: case PPC::STWU8:
+ case PPC::STVEWX:
+ case PPC::STFIWX:
+ case PPC::STWBRX:
+ ThisStoreSize = 4;
+ break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
+ case PPC::STD:
+ case PPC::STDU:
+ case PPC::STFD:
+ case PPC::STFDX:
+ case PPC::STDX:
+ case PPC::STDUX:
+ ThisStoreSize = 8;
+ break;
+ case PPC::STVX:
+ case PPC::STVXL:
+ ThisStoreSize = 16;
+ break;
+ }
+
+ StoreSize[NumStores] = ThisStoreSize;
+ StorePtr1[NumStores] = Node->getOperand(1);
+ StorePtr2[NumStores] = Node->getOperand(2);
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 000000000000..74bf8e52d8fa
--- /dev/null
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const TargetInstrInfo &TII;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ SDValue StorePtr1[4], StorePtr2[4];
+ unsigned StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(unsigned LoadSize,
+ SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 000000000000..823e3162191e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1170 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCISelLowering.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN PPCDAGToDAGISel : public SelectionDAGISel {
+ PPCTargetMachine &TM;
+ PPCTargetLowering &PPCLowering;
+ const PPCSubtarget &PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(*TM.getTargetLowering()),
+ PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (Fn.hasAvailableExternallyLinkage())
+ return false;
+
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+
+ InsertVRSaveCode(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Because preinc imms have already been validated, just
+ /// accept it.
+ bool SelectAddrImmOffs(SDValue Op, SDValue N, SDValue &Out) const {
+ Out = N;
+ return true;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmShift - Returns true if the address N can be represented by
+ /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
+ /// for use by STD and friends.
+ bool SelectAddrImmShift(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddrIdx(Op, Op, Op0, Op1))
+ SelectAddrImm(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectAddrImm(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ SDValue BuildSDIVSequence(SDNode *N);
+ SDValue BuildUDIVSequence(SDNode *N);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ void InsertVRSaveCode(Function &Fn);
+
+ virtual const char *getPassName() const {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
+ // Should use subtarget info to pick the right hazard recognizer. For
+ // now, always return a PPC970 recognizer.
+ const TargetInstrInfo *II = TM.getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new PPCHazardRecognizer970(*II);
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDValue Op);
+ };
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void PPCDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type type created
+ // by the scheduler. Detect them now.
+ MachineFunction &Fn = MachineFunction::get(&F);
+ bool HasVectorVReg = false;
+ for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
+ e = RegInfo->getLastVirtReg()+1; i != e; ++i)
+ if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().getDesc().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = BB->getParent()->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ if (PPCLowering.getPointerTy() == MVT::i32) {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR), PPC::LR);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8), PPC::LR8);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = CountLeadingZeros_32(Val);
+ // look for the first zero bit after the run of ones
+ ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = CountLeadingZeros_32(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool IsShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (IsShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (IsShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+ CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2, Tmp3;
+ bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ } else {
+ Op1 = Op1.getOperand(0);
+ }
+ }
+
+ Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0;
+ SH &= 31;
+ SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+ return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, DebugLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16((int)Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getTargetNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt32(Imm)) {
+ SDValue Xor(CurDAG->getTargetNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ assert(0 && "Should be lowered by legalize!");
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons. In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+ Invert = false;
+ Other = -1;
+ switch (CC) {
+ default: assert(0 && "Unknown condition!"); abort();
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ assert(0 && "Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+ return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getTargetNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETNE: {
+ SDValue AD =
+ SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getTargetNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getTargetNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ Op = SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getTargetNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ Op = SDValue(CurDAG->getTargetNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getTargetNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getTargetNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ bool Inv;
+ int OtherCondIdx;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(0, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+ IntCR = SDValue(CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (OtherCondIdx == -1 && !Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ if (Inv) {
+ assert(OtherCondIdx == -1 && "Can't have split plus negation");
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+ }
+
+ // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+ // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+ // Get the other bit of the comparison.
+ Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+ SDValue OtherCond =
+ SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = Op.getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt32(Imm)) {
+ Shift = CountTrailingZeros_64(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt32(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt16(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getTargetNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getTargetNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getTargetNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getTargetNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift), getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC:
+ return SelectSETCC(Op);
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, Op.getValueType());
+ unsigned Opc = Op.getValueType() == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getTargetNode(Opc, dl, Op.getValueType(), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFCR: {
+ SDValue InFlag = N->getOperand(1);
+ // Use MFOCRF if supported.
+ if (PPCSubTarget.isGigaProcessor())
+ return CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ else
+ return CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getTargetNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ MVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (isa<ConstantSDNode>(Offset) ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT()) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT()) {
+ default: assert(0 && "Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ // FIXME: PPC64
+ return CurDAG->getTargetNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering.getPointerTy(),
+ MVT::Other, Ops, 3);
+ } else {
+ assert(0 && "R+R preindex loads not supported yet!");
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return NULL;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
+ }
+ case ISD::DECLARE: {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+
+ // FIXME: We need to handle this for VLAs.
+ if (!FINode) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+
+ if (N2.getOpcode() == ISD::ADD) {
+ if (N2.getOperand(0).getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
+ N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Hi &&
+ N2.getOperand(1).getOpcode() == PPCISD::Lo)
+ N2 = N2.getOperand(0).getOperand(1).getOperand(0);
+ else if (N2.getOperand(0).getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg &&
+ N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Lo &&
+ N2.getOperand(1).getOpcode() == PPCISD::Hi)
+ N2 = N2.getOperand(0).getOperand(1).getOperand(0);
+ else if (N2.getOperand(0).getOpcode() == PPCISD::Hi &&
+ N2.getOperand(1).getOpcode() == PPCISD::Lo)
+ N2 = N2.getOperand(0).getOperand(0);
+ }
+
+ // If we don't have a global address here, the debug info is mangled, just
+ // drop it.
+ if (!isa<GlobalAddressSDNode>(N2)) {
+ ReplaceUses(Op.getValue(0), Chain);
+ return NULL;
+ }
+ int FI = cast<FrameIndexSDNode>(N1)->getIndex();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(N2)->getGlobal();
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
+ return CurDAG->SelectNodeTo(N, TargetInstrInfo::DECLARE,
+ MVT::Other, Tmp1, Tmp2, Chain);
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 000000000000..a7744b8f7a4e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,4878 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCPerfectShuffle.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+ cl::Hidden);
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+ addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+ addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ }
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // RET must be custom lowered, to meet ABI requirements.
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // VAARG is custom lowered with ELF 32 ABI
+ if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+ addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ }
+
+ setShiftAmountType(MVT::i32);
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ setTargetDAGCombine(ISD::BSWAP);
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ TargetMachine &TM = getTargetMachine();
+ // Darwin passes everything on 4 byte boundary.
+ if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ return 4;
+ // FIXME Elf TBD
+ return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
+ case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF";
+ case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho";
+ case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::MTFSB0: return "PPCISD::MTFSB0";
+ case PPCISD::MTFSB1: return "PPCISD::MTFSB1";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::MTFSF: return "PPCISD::MTFSF";
+ case PPCISD::TAILCALL: return "PPCISD::TAILCALL";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ }
+}
+
+
+MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i32;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+ if (!isUnary) {
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else {
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ "PPC only supports shuffles by bytes!");
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(0, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (UniquedVals[Multiple-1].getNode() == 0)
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int ShAmt = (4-ByteSize)*8;
+ int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.ComputeMaskedBits(N.getOperand(1),
+ APInt::getAllOnesValue(N.getOperand(1)
+ .getValueSizeInBits()),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from parent load or store, not from address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(PPC::R0, N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4]. Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG) const {
+ // FIXME dl should come from the parent load or store, not the address
+ DebugLoc dl = N.getDebugLoc();
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.ComputeMaskedBits(N.getOperand(0),
+ APInt::getAllOnesValue(N.getOperand(0)
+ .getValueSizeInBits()),
+ LHSKnownZero, LHSKnownOne);
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ Base = N.getOperand(0);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address. Verify low two bits are clear.
+ if ((CN->getZExtValue() & 3) == 0) {
+ // If this address fits entirely in a 14-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm)) {
+ Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+ Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ return true;
+ }
+
+ // Fold the low-part of 32-bit absolute addresses into addr mode.
+ if (CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+ Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0);
+ return true;
+ }
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ // Disabled by default for now.
+ if (!EnablePPCPreinc) return false;
+
+ SDValue Ptr;
+ MVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ ST = ST;
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ // TODO: Check reg+reg first.
+
+ // LDU/STU use reg+imm*4, others use reg+imm.
+ if (VT != MVT::i64) {
+ // reg + imm
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ return false;
+ } else {
+ // reg + imm * 4.
+ if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug loc here
+ DebugLoc dl = Op.getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ return Lo;
+}
+
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(0 && "TLS not implemented for PPC.");
+ return SDValue(); // Not reached
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = GSDN->getDebugLoc();
+
+ const TargetMachine &TM = DAG.getTarget();
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
+
+ // If this is a non-darwin platform, we don't support non-static relo models
+ // yet.
+ if (TM.getRelocationModel() == Reloc::Static ||
+ !TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ // Generate non-pic code that has direct accesses to globals.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+ }
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(), PtrVT), Hi);
+ }
+
+ Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
+
+ if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV))
+ return Lo;
+
+ // If the global is weak or external, we have to go through the lazy
+ // resolution stub.
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ MVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ MVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ MVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+
+ assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+ return SDValue(); // Not reached
+}
+
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ const Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ std::pair<SDValue, SDValue> CallResult =
+ LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
+ false, false, CallingConv::C, false,
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ Args, DAG, dl);
+
+ SDValue Ops[] =
+ { CallResult.first, CallResult.second };
+
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex,
+ int VarArgsStackOffset,
+ unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Subtarget.isMachoABI()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+
+ // For ELF 32 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
+ SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
+
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT);
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1), SV, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// depending on which subtarget is selected.
+static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
+ if (Subtarget.isMachoABI()) {
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+ return FPR;
+ }
+
+
+ static const unsigned FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
+ bool isVarArg, unsigned PtrByteSize) {
+ MVT ArgVT = Arg.getValueType();
+ unsigned ArgSize =ArgVT.getSizeInBits()/8;
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
+ SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget) {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SmallVector<SDValue, 8> ArgValues;
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
+ // to handle Elf here.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
+ ++ArgNo) {
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += isPPC64 ? 8 : 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+ //
+ // In the ELF 32 ABI, GPRs and stack are double word align: an argument
+ // represented with two words (long long or double) must be copied to an
+ // even GPR_idx value or to an even ArgOffset value.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
+ ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ MVT ObjectVT = Op.getValue(ArgNo).getValueType();
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
+ // See if next argument requires stack alignment in ELF
+ bool Align = Flags.isSplit();
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Flags,
+ isVarArg,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
+ Flags,
+ isVarArg,
+ PtrByteSize);
+
+ // FIXME alignment for ELF may not be right
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgValues.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgVal will be address of the beginning of
+ // the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ if (isMachoABI) ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ if (!isPPC64) {
+ // Double word align in ELF
+ if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
+
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // Stack align in ELF
+ if (needsLoad && Align && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs && isMachoABI) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+ if (ObjectVT == MVT::f32)
+ VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
+ else
+ VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // Stack align in ELF
+ if (needsLoad && Align && isELF32_ABI)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ // All FP arguments reserve stack space in Macho ABI.
+ if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
+ RegInfo.addLiveIn(VR[VR_idx], VReg);
+ ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
+ }
+
+ ArgValues.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+
+ int depth;
+ if (isELF32_ABI) {
+ VarArgsNumGPR = GPR_idx;
+ VarArgsNumFPR = FPR_idx;
+
+ // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
+ // pointer.
+ depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
+ Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
+ PtrVT.getSizeInBits()/8);
+
+ VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ ArgOffset);
+
+ }
+ else
+ depth = ArgOffset;
+
+ VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ depth);
+ SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+ // In ELF 32 ABI, the fixed integer arguments of a variadic function are
+ // stored to the VarArgsFrameIndex on the stack.
+ if (isELF32_ABI) {
+ for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
+ SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
+ SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ else
+ VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+
+ RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ if (isELF32_ABI) {
+ for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
+ SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
+ SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
+ unsigned VReg;
+ VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+ }
+
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size());
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+ bool isPPC64,
+ bool isMachoABI,
+ bool isVarArg,
+ unsigned CC,
+ CallSDNode *TheCall,
+ unsigned &nAltivecParamsAtEnd) {
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned NumOps = TheCall->getNumArgs();
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ MVT ArgVT = Arg.getValueType();
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+ ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes,
+ PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+
+ // Tail call needs the stack to be aligned.
+ if (CC==CallingConv::Fast && PerformTailCallOpt) {
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ }
+
+ return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accomodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall,
+ unsigned ParamSize) {
+
+ if (!IsTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG& DAG) const {
+ // Variable argument functions are not supported.
+ if (!PerformTailCallOpt || TheCall->isVarArg())
+ return false;
+
+ if (CheckTailCallReturnConstraints(TheCall, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = TheCall->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != TheCall->getNumArgs(); i++) {
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ if (Flags.isByVal()) return false;
+ }
+
+ SDValue Callee = TheCall->getCallee();
+ // Non PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 6 >> 6) != Addr)
+ return 0; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+ SmallVector<SDValue, 8> &MemOpChains,
+ DebugLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ PseudoSourceValue::getFixedStack(FI),
+ 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isMachoABI,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
+ isMachoABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc);
+ int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
+ isMachoABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(NewRetAddr), 0);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ DebugLoc dl) {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ unsigned Size, DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+ SmallVector<TailCallArgumentInfo, 8>& TailCallArguments,
+ DebugLoc dl) {
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget,
+ TargetMachine &TM) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ bool isVarArg = TheCall->isVarArg();
+ unsigned CC = TheCall->getCallingConv();
+ bool isTailCall = TheCall->isTailCall()
+ && CC == CallingConv::Fast && PerformTailCallOpt;
+ SDValue Callee = TheCall->getCallee();
+ unsigned NumOps = TheCall->getNumArgs();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ bool isMachoABI = Subtarget.isMachoABI();
+ bool isELF32_ABI = Subtarget.isELF32_ABI();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
+ // SelectExpr to use to put the arguments in the appropriate registers.
+ std::vector<SDValue> args_to_use;
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (PerformTailCallOpt && CC==CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ unsigned nAltivecParamsAtEnd = 0;
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned NumBytes =
+ CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
+ TheCall, nAltivecParamsAtEnd);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const unsigned GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const unsigned GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const unsigned *FPR = GetFPR(Subtarget);
+
+ static const unsigned VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = isMachoABI ? 13 : 8;
+ const unsigned NumVRs = array_lengthof( VR);
+
+ const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ bool inMem = false;
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ // See if next argument requires stack alignment in ELF
+ bool Align = Flags.isSplit();
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ // Stack align in ELF 32
+ if (isELF32_ABI && Align)
+ PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
+ StackPtr.getValueType());
+ else
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME Elf untested, what are alignment rules?
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
+ if (Size==1 || Size==2) {
+ // Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ MVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ NULL, 0, VT);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, Size, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, Size, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ // And copy the pieces of it that fit into registers.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ if (isMachoABI)
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i32:
+ case MVT::i64:
+ // Double word align in ELF
+ if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Align)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+
+ ArgOffset += PtrByteSize;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
+ Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (isMachoABI) {
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ }
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ inMem = true;
+ }
+ if (inMem || isMachoABI) {
+ // Stack align in ELF
+ if (isELF32_ABI && Align)
+ ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ }
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = TheCall->getArg(i);
+ MVT ArgType = Arg.getValueType();
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
+ if (isVarArg && isELF32_ABI) {
+ SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
+ Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ if (isTailCall) {
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isMachoABI, dl);
+ }
+
+ // Emit callseq_end just before tailcall node.
+ if (isTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ std::vector<MVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+ else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ else {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+ 2 + (InFlag.getNode() != 0));
+ InFlag = Chain.getValue(1);
+
+ // Copy the callee address into R12/X12 on darwin.
+ if (isMachoABI) {
+ unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Flag);
+ Ops.push_back(Chain);
+ CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
+ Callee.setNode(0);
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(InFlag.getNode() &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(PPCISD::TAILCALL, dl,
+ TheCall->getVTList(), &Ops[0], Ops.size());
+ return SDValue(Chain.getNode(), Op.getResNo());
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag);
+ if (TheCall->getValueType(0) != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ SmallVector<SDValue, 16> ResultVals;
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT VT = VA.getValVT();
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VT, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ InFlag = Chain.getValue(2);
+ }
+
+ // If the function returns void, just return the chain.
+ if (RVLocs.empty())
+ return Chain;
+
+ // Otherwise, merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size());
+ return Res.getValue(Op.getResNo());
+}
+
+SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
+ TargetMachine &TM) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+ CCState CCInfo(CC, isVarArg, TM, RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+
+ Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL);
+ if (Chain.getOpcode() == PPCISD::TAILCALL) {
+ SDValue TailCall = Chain;
+ SDValue TargetAddress = TailCall.getOperand(1);
+ SDValue StackAdjustment = TailCall.getOperand(2);
+
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(TargetAddress)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDValue,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool IsPPC64 = Subtarget.isPPC64();
+ unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool isMachoABI = PPCSubTarget.isMachoABI();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the corect type for pointers.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ // Cannot handle SETEQ/SETNE.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+ MVT ResVT = Op.getValueType();
+ MVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getValueType().getSimpleVT()) {
+ default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+ // Emit a store to the stack slot.
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32)
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::f64, Op.getOperand(0));
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 0, 8, 8);
+ SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other,
+ DAG.getEntryNode(), Ext64, FIdx,
+ DAG.getMemOperand(MO));
+ // Load the value as a double.
+ SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT VT = Op.getValueType();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ std::vector<MVT> NodeTys;
+ SDValue MFFSreg, InFlag;
+
+ // Save FP Control Word to register
+ NodeTys.push_back(MVT::f64); // return register
+ NodeTys.push_back(MVT::Flag); // unused in this context
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, NULL, 0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ MVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT,
+ SelectionDAG &DAG, DebugLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const MVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ MVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+ &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl,
+ MVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ DebugLoc dl, MVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // tmp = VSPLTI[bhw], result = add tmp, tmp
+ if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+ SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+ Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (i << (int)TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ // Three instruction sequences.
+
+ // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
+ if (SextVal >= 0 && SextVal <= 31) {
+ SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+ // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
+ if (SextVal >= -31 && SextVal <= 0) {
+ SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+ SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+ LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS);
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ DebugLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: assert(0 && "Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ MVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ MVT VT = Op.getValueType();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ if (isFourElementShuffle) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+ MVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ DebugLoc dl = Op.getDebugLoc();
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ std::vector<MVT> VTs;
+ VTs.push_back(Op.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, NULL, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ assert(0 && "Unknown mul to lower!");
+ abort();
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset,
+ VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
+ VarArgsStackOffset, VarArgsNumGPR,
+ VarArgsNumFPR, PPCSubTarget);
+
+ case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget,
+ getTargetMachine());
+ case ISD::RET: return LowerRET(Op, DAG, getTargetMachine());
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG,
+ Op.getDebugLoc());
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+ return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // This sequence changes FPSCR to do round-to-zero, adds the two halves
+ // of the long double, and puts FPSCR back the way it was. We do not
+ // actually model FPSCR.
+ std::vector<MVT> NodeTys;
+ SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+ NodeTys.push_back(MVT::f64); // Return register
+ NodeTys.push_back(MVT::Flag); // Returns a flag for later insns
+ Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ MFFSreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = DAG.getConstant(31, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = DAG.getConstant(30, MVT::i32);
+ Ops[1] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+ InFlag = Result.getValue(0);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64); // result of add
+ NodeTys.push_back(MVT::Flag); // Returns a flag
+ Ops[0] = Lo;
+ Ops[1] = Hi;
+ Ops[2] = InFlag;
+ Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+ FPreg = Result.getValue(0);
+ InFlag = Result.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::f64);
+ Ops[0] = DAG.getConstant(1, MVT::i32);
+ Ops[1] = MFFSreg;
+ Ops[2] = FPreg;
+ Ops[3] = InFlag;
+ Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+ FPreg = Result.getValue(0);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+
+ if (ptrA!=PPC::R0) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = PPCSubTarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA!=PPC::R0) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(PPC::R0).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+ } else {
+ assert(0 && "Unexpected instr type to insert");
+ }
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->getZExtValue() == 0 || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+ N->getOperand(2), N->getOperand(3));
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16)) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0),
+ BSwapOp, N->getOperand(2), N->getOperand(3),
+ DAG.getValueType(N->getOperand(1).getValueType()));
+ }
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ std::vector<MVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Other);
+ SDValue MO = DAG.getMemOperand(LD->getMemOperand());
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ MO, // MemOperand
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4);
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = 0;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = 0;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == 0; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFCR instruction, we know this is safe. Otherwise we
+ // give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFCR)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ std::vector<MVT> VTs;
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ VTs.push_back(LHS.getOperand(2).getValueType());
+ VTs.push_back(MVT::Flag);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, PPC::G8RCRegisterClass);
+ return std::make_pair(0U, PPC::GPRCRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, PPC::F4RCRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, PPC::F8RCRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, PPC::VRRCRegisterClass);
+ case 'y': // crrc
+ return std::make_pair(0U, PPC::CRRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
+/// it means one of the asm constraint of the inline asm instruction being
+/// processed is 'm'.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0,0);
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: assert(0 && "Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+ // PPC allows a sign-extended 16-bit immediate field.
+ return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+ return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+
+ // Make sure the function really does not optimize away the store of the RA
+ // to the stack.
+ FuncInfo->setLRStoreRequired();
+ return DAG.getLoad(getPointerTy(), dl,
+ DAG.getEntryNode(), RetAddrFI, NULL, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects())
+ && MFI->getStackSize();
+
+ if (isPPC64)
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1,
+ MVT::i64);
+ else
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1,
+ MVT::i32);
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 000000000000..79464749724e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,394 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to,
+ /// then a SRCVALUE for the address.
+ STFIWX,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32,
+
+ /// CALL - A direct function call.
+ CALL_Macho, CALL_ELF,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL_Macho, BCTRL_ELF,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFCR,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ // The following 5 instructions are used only as part of the
+ // long double-to-int conversion sequence.
+
+ /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+ /// register.
+ MFFS,
+
+ /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+ MTFSB0,
+
+ /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+ MTFSB1,
+
+ /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+ /// rounding towards zero. It has flags added so it won't move past the
+ /// FPSCR-setting instructions.
+ FADDRTZ,
+
+ /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+ MTFSF,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TAILCALL - Indicates a tail call should be taken.
+ TAILCALL,
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ bool isUnary);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int VarArgsStackOffset; // StackOffset for start of stack
+ // arguments.
+ unsigned VarArgsNumGPR; // Index of the first unused integer
+ // register for parameter passing.
+ unsigned VarArgsNumFPR; // Index of the first unused double
+ // register for parameter passing.
+ int ReturnAddrIndex; // FrameIndex for return slot.
+ const PPCSubtarget &PPCSubTarget;
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImmShift - Returns true if the address N can be
+ /// represented by a base register plus a signed 14-bit displacement
+ /// [r+imm*4]. Suitable for use by STD and friends.
+ bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG) const;
+
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode for load / store of the
+ /// given type.
+ virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+ /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+ /// the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Target which want to do tail call
+ /// optimization should implement this function.
+ virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG &DAG) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ DebugLoc dl);
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ int VarArgsFrameIndex, int VarArgsStackOffset,
+ unsigned VarArgsNumGPR, unsigned VarArgsNumFPR,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
+ int VarArgsStackOffset, unsigned VarArgsNumGPR,
+ unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
+ int &VarArgsFrameIndex,
+ int &VarArgsStackOffset,
+ unsigned &VarArgsNumGPR,
+ unsigned &VarArgsNumFPR,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget, TargetMachine &TM);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM);
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl);
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 000000000000..417c8ed6e906
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,723 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo64 : Operand<i64> {
+ let PrintMethod = "printSymbolLo";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+// Macho ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_Macho : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_Macho : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>;
+ }
+}
+
+// ELF 64 ABI Calls = Macho ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the PPC64 non-callee saved registers.
+ Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR8,CTR8,
+ CR0,CR1,CR5,CR6,CR7] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA8_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>;
+ }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)),
+ (BL8_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)),
+ (BL8_Macho texternalsym:$dst)>;
+
+def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
+ (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
+ (BL8_ELF texternalsym:$dst)>;
+
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst,
+ (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new),
+ "${:comment} ATOMIC_SWAP_I64 PSEUDO!",
+ [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", LdStLDARX,
+ [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", LdStSTDCX,
+ [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In64BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC8 $result, $negsize, $fpsi",
+ [(set G8RC:$result,
+ (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ []>;
+
+def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+ PPC970_DGroup_Cracked;
+
+def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhd $rT, $rA, $rB", IntMulHW,
+ [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulhdu $rT, $rA, $rB", IntMulHWU,
+ [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "sld $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srd $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+ "srad $rA, $rS, $rB", IntRotateD,
+ [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IntRotateD,
+ [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+ "cntlzd $rA, $rS", IntGeneral,
+ [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divd $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "divdu $rT, $rA, $rB", IntDivD,
+ [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+ "mulld $rT, $rA, $rB", IntMulHD,
+ [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+ (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+ "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+ "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+ []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+ (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+ "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+ []>, isPPC64;
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA,
+ [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA,
+ [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+ ptr_rc:$rA),
+ "lhau $rD, $disp($rA)", LdStGeneral,
+ []>, RegConstraint<"$rA = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+ "ld $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+ "ldx $rD, $src", LdStLD,
+ [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+
+let mayLoad = 1 in
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", LdStLD,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+}
+
+let PPC970_Unit = 2 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+ "std $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdx $rS, $dst", LdStSTD,
+ [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+
+def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+ s16immX4:$ptroff, ptr_rc:$ptrreg),
+ "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+ [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTD,
+ []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
+def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fcfid $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctidz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+ (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+ (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+ (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+ (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+ (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+ (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+ (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+ (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+ (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 G8RC:$in, tjumptable:$g)>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 000000000000..9a5be79e816f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,668 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+ return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+ : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ !strconcat(opc, " $vD, $vB"), VecFP,
+ [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", LdStGeneral /*FIXME*/, []>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+ "mfvscr $vD", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+ "mtvscr $vB", LdStGeneral,
+ [(int_ppc_altivec_mtvscr VRRC:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+ "lvx $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", LdStGeneral,
+ [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvebx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvehx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvewx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvx $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+ "stvxl $rS, $dst", LdStGeneral,
+ [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fsub V_immneg0,
+ (fsub (fmul VRRC:$vA, VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
+
+def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>;
+def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>;
+def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", VecFP,
+ [(set VRRC:$vD,
+ (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddfp $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vaddubm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vadduwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+
+
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vand $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vandc $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfsx $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vcfux $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctsxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vctuxs $vD, $vB, $UIMM", VecFP,
+ [(set VRRC:$vD,
+ (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrghw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglb $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglh $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vmrglw $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+
+def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubfp $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsububm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuhm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vsubuwm $vD, $vA, $vB", VecGeneral,
+ [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vnor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vxor $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltb $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vsplth $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+ "vspltw $vD, $vB, $UIMM", VecPerm,
+ [(set VRRC:$vD,
+ (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>;
+def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", VecPerm,
+ [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuhum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ "vpkuwum $vD, $vA, $vB", VecFP,
+ [(set VRRC:$vD,
+ (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+ (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+ (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+ (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),
+ (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))),
+ (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+ (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+ (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+ (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 000000000000..1de69116cd58
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 000000000000..54cebcdecd61
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,875 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+ let C = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<14> DS;
+ bits<5> RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-29} = DS;
+ let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{2-6};
+ let BI{0-1} = BIBO{0-1};
+ let BI{2-4} = CR;
+ let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> ST;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ string cstr, InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> RT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+ let Constraints = cstr;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = RT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 000000000000..778f0349d10f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,818 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCGenInstrInfo.inc"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp.
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+ : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
+ RI(*TM.getSubtargetImpl(), *this) {}
+
+bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned& sourceReg,
+ unsigned& destReg,
+ unsigned& sourceSubIdx,
+ unsigned& destSubIdx) const {
+ sourceSubIdx = destSubIdx = 0; // No sub-registers.
+
+ unsigned oc = MI.getOpcode();
+ if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
+ oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isReg() &&
+ "invalid PPC OR instruction!");
+ if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ADDI) { // addi r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid PPC ADDI instruction!");
+ if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::ORI) { // ori r1, r2, 0
+ assert(MI.getNumOperands() >= 3 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ MI.getOperand(2).isImm() &&
+ "invalid PPC ORI instruction!");
+ if (MI.getOperand(2).getImm() == 0) {
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
+ oc == PPC::FMRSD) { // fmr r1, r2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid PPC FMR instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid PPC MCRF instruction");
+ sourceReg = MI.getOperand(1).getReg();
+ destReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI)
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return 0;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0)
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, dl, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ BuildMI(&MBB, dl, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ if (DestRC == PPC::GPRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::G8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F4RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::CRRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::VRRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else if (DestRC == PPC::CRBITRCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg);
+ } else {
+ // Attempt to copy register that is not GPR or FPR
+ return false;
+ }
+
+ return true;
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (RC == PPC::GPRCRegisterClass) {
+ if (SrcReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (SrcReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else {
+ // FIXME: this spills LR immediately to memory in one step. To do this,
+ // we use R11, which we know cannot be used in the prolog/epilog. This is
+ // a hack.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(PPC::X11,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+ (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+ // FIXME (64-bit): Enable
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else {
+ // FIXME: We use R0 here, because it isn't available for RA. We need to
+ // store the CR in the low 4-bits of the saved value. First, issue a MFCR
+ // to save all of the CRBits.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are
+ // in CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+ // rlwinm r0, r0, ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(PPC::R0,
+ getKillRegState(isKill)),
+ FrameIdx));
+ }
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+ // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+ // backend currently only uses CR1EQ as an individual bit, this should
+ // not cause any bug. If we need other uses of CR bits, the following
+ // code may be invalid.
+ unsigned Reg = 0;
+ if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // STVX VAL, 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+}
+
+void PPCInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ if (Addr[0].isFI()) {
+ if (StoreRegToStackSlot(MF, SrcReg, isKill,
+ Addr[0].getIndex(), RC, NewMIs)) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setSpillsCR();
+ }
+
+ return;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ unsigned Opc = 0;
+ if (RC == PPC::GPRCRegisterClass) {
+ Opc = PPC::STW;
+ } else if (RC == PPC::G8RCRegisterClass) {
+ Opc = PPC::STD;
+ } else if (RC == PPC::F8RCRegisterClass) {
+ Opc = PPC::STFD;
+ } else if (RC == PPC::F4RCRegisterClass) {
+ Opc = PPC::STFS;
+ } else if (RC == PPC::VRRCRegisterClass) {
+ Opc = PPC::STVX;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (RC == PPC::GPRCRegisterClass) {
+ if (DestReg != PPC::LR) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::G8RCRegisterClass) {
+ if (DestReg != PPC::LR8) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+ PPC::R11), FrameIdx));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+ }
+ } else if (RC == PPC::F8RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::F4RCRegisterClass) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (RC == PPC::CRRCRegisterClass) {
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
+ FrameIdx));
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+ }
+
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+ } else if (RC == PPC::CRBITRCRegisterClass) {
+
+ unsigned Reg = 0;
+ if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+ PPC::CRRCRegisterClass, NewMIs);
+
+ } else if (RC == PPC::VRRCRegisterClass) {
+ // We don't have indexed addressing for vector loads. Emit:
+ // R0 = ADDI FI#
+ // Dest = LVX 0, R0
+ //
+ // FIXME: We use R0 here, because it isn't available for RA.
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+ FrameIdx, 0, 0));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+ .addReg(PPC::R0));
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+}
+
+void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ if (Addr[0].isFI()) {
+ LoadRegFromStackSlot(MF, DebugLoc::getUnknownLoc(),
+ DestReg, Addr[0].getIndex(), RC, NewMIs);
+ return;
+ }
+
+ unsigned Opc = 0;
+ if (RC == PPC::GPRCRegisterClass) {
+ assert(DestReg != PPC::LR && "Can't handle this yet!");
+ Opc = PPC::LWZ;
+ } else if (RC == PPC::G8RCRegisterClass) {
+ assert(DestReg != PPC::LR8 && "Can't handle this yet!");
+ Opc = PPC::LD;
+ } else if (RC == PPC::F8RCRegisterClass) {
+ Opc = PPC::LFD;
+ } else if (RC == PPC::F4RCRegisterClass) {
+ Opc = PPC::LFS;
+ } else if (RC == PPC::VRRCRegisterClass) {
+ Opc = PPC::LVX;
+ } else {
+ assert(0 && "Unknown regclass!");
+ abort();
+ }
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+/// copy instructions, turning them into load/store instructions.
+MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ if (Ops.size() != 1) return NULL;
+
+ // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
+ // it takes more than one instruction to store it.
+ unsigned Opc = MI->getOpcode();
+ unsigned OpNum = Ops[0];
+
+ MachineInstr *NewMI = NULL;
+ if ((Opc == PPC::OR &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if ((Opc == PPC::OR8 &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if (Opc == PPC::FMRD) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ } else if (Opc == PPC::FMRS) {
+ if (OpNum == 0) { // move -> store
+ unsigned InReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
+ .addReg(InReg, getKillRegState(isKill)),
+ FrameIndex);
+ } else { // move -> load
+ unsigned OutReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
+ .addReg(OutReg,
+ RegState::Define |
+ getDeadRegState(isDead)),
+ FrameIndex);
+ }
+ }
+
+ return NewMI;
+}
+
+bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ if (Ops.size() != 1) return false;
+
+ // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
+ // it takes more than one instruction to store it.
+ unsigned Opc = MI->getOpcode();
+
+ if ((Opc == PPC::OR &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
+ return true;
+ else if ((Opc == PPC::OR8 &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
+ return true;
+ else if (Opc == PPC::FMRD || Opc == PPC::FMRS)
+ return true;
+
+ return false;
+}
+
+
+bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case PPC::BLR: // Return.
+ case PPC::B: // Uncond branch.
+ case PPC::BCTR: // Indirect branch.
+ return true;
+ default: return false;
+ }
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case PPC::INLINEASM: { // Inline Asm: Variable size.
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr);
+ }
+ case PPC::DBG_LABEL:
+ case PPC::EH_LABEL:
+ case PPC::GC_LABEL:
+ return 0;
+ default:
+ return 4; // PowerPC instructions are all 4 bytes
+ }
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 000000000000..492634c979eb
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,168 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+}
+
+
+class PPCInstrInfo : public TargetInstrInfoImpl {
+ PPCTargetMachine &TM;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+ explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
+ /// copy instructions, turning them into load/store instructions.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 000000000000..772e25ad232f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1475 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 3, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 4, [
+ SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions. It changes the
+// bits in the FPSCR which is not modelled.
+def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+ [SDNPOutFlag]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+ [SDNPInFlag, SDNPOutFlag]>;
+def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3,
+ [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+ SDTCisVT<3, f64>]>,
+ [SDNPInFlag]>;
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field. Used by instructions like 'addi'.
+ if (N->getValueType(0) == MVT::i32)
+ return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+ else
+ return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT {
+ list<Register> Defs = [CR0];
+ bit RC = 1;
+}
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
+ let PrintMethod = "printS16X4ImmOperand";
+}
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+}
+def aaddr : Operand<iPTR> {
+ let PrintMethod = "printAbsAddrOperand";
+}
+def piclabel: Operand<iPTR> {
+ let PrintMethod = "printPICLabel";
+}
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+}
+// Address operands
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+}
+
+// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+ (ops (i32 20), (i32 zero_reg))> {
+ let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi),
+ "${:comment} DYNALLOC $result, $negsize, $fpsi",
+ [(set GPRC:$result,
+ (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1, // Expanded by the scheduler.
+ PPC970_Single = 1 in {
+ def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!",
+ []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+ "${:comment} SPILL_CR $cond $F", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+ "b${p:cc}lr ${p:reg}", BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins target:$dst),
+ "b $dst", BrB,
+ [(br bb:$dst)]>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst),
+ "b${cond:cc} ${cond:reg}, $dst"
+ /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Macho ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,
+ CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
+ CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_Macho : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_Macho : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>;
+ }
+}
+
+// ELF ABI Calls.
+let isCall = 1, PPC970_Unit = 7,
+ // All calls clobber the non-callee saved registers...
+ Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+ F0,F1,F2,F3,F4,F5,F6,F7,F8,
+ V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+ LR,CTR,
+ CR0,CR1,CR5,CR6,CR7,
+ CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ,
+ CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_ELF : IForm<18, 0, 1,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func", BrB, []>; // See Pat patterns below.
+ def BLA_ELF : IForm<18, 1, 1,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func", BrB,
+ [(PPCcall_ELF (i32 imm:$func))]>;
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+ (outs), (ins variable_ops),
+ "bctrl", BrB,
+ [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>;
+ }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+ Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", BrB,
+ []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+ "ba $dst", BrB,
+ []>;
+
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+ "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+ "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+ "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+ "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+ "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+ "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+ "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+ "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomDAGSchedInserter = 1 in {
+ let Uses = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr),
+ "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new),
+ "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst,
+ (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new),
+ "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", LdStLWARX,
+ [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", LdStSTWCX,
+ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+ "lbz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+ "lha $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+ "lhz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+ "lwz $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+ "lfs $rD, $src", LdStLFDU,
+ [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+ "lfd $rD, $src", LdStLFD,
+ [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", LdStGeneral,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfs $rD, $addr", LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+ "lfd $rD, $addr", LdStLFD,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+ "lhax $rD, $src", LdStLHA,
+ [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", LdStGeneral,
+ [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", LdStLFDU,
+ [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", LdStLFDU,
+ [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+ "stb $rS, $src", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+ "sth $rS, $src", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+ "stw $rS, $src", LdStGeneral,
+ [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+ "stfs $rS, $dst", LdStUX,
+ [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+ "stfd $rS, $dst", LdStUX,
+ [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res,
+ (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+ symbolLo:$ptroff, ptr_rc:$ptrreg),
+ "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+ [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
+ iaddroff:$ptroff))]>,
+ RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stbx $rS, $dst", LdStGeneral,
+ [(truncstorei8 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthx $rS, $dst", LdStGeneral,
+ [(truncstorei16 GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwx $rS, $dst", LdStGeneral,
+ [(store GPRC:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+ "stwux $rS, $rA, $rB", LdStGeneral,
+ []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", LdStGeneral,
+ [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", LdStUX,
+ [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+ "stfsx $frS, $dst", LdStUX,
+ [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+ "stfdx $frS, $dst", LdStUX,
+ [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+let isBarrier = 1 in
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "sync", LdStSync,
+ [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+ PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IntGeneral,
+ []>;
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+ "addis $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ "la $rD, $sym($rA)", IntGeneral,
+ [(set GPRC:$rD, (add GPRC:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IntGeneral,
+ [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+
+let isReMaterializable = 1 in {
+ def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ "li $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, immSExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ "lis $rD, $imm", IntGeneral,
+ [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+ isDOT;
+def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IntGeneral,
+ [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+ []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nand $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "and $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "andc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "or $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "nor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "orc $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "eqv $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "xor $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "slw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "srw $rA, $rS, $rB", IntGeneral,
+ [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+ "sraw $rA, $rS, $rB", IntShift,
+ [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
+ "srawi $rA, $rS, $SH", IntShift,
+ [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
+ "cntlzw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsb $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+ "extsh $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+ def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fctiwz $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+ def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+ "frsp $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fround F8RC:$frB))]>;
+ def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fsqrt $frD, $frB", FPSqrt,
+ [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+ def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fsqrts $frD, $frB", FPSqrt,
+ [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+ }
+}
+
+/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+///
+/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F8RC:$frD, F8RC:$frB)
+ PPC970_Unit_Pseudo;
+def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fextend F4RC:$frB))]>,
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fnabs $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fneg $frD, $frB", FPGeneral,
+ [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+
+
+// XL-Form instructions. condition register logical ops.
+//
+def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+ "mcrf $BF, $BFA", BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "creqv $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+ (ins CRBITRC:$CRA, CRBITRC:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR,
+ []>;
+
+def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+ "creqv $dst, $dst, $dst", BrCR,
+ []>;
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+ "mfctr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+ "mtlr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+ "mflr $rT", SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970. As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+// FIXME: this Uses all the CR registers. Marking it as such is
+// necessary for DeadMachineInstructionElim to do the right thing.
+// However, marking it also exposes PR 2964, and causes crashes in
+// the Local RA because it doesn't like this sequence:
+// vreg = MCRF CR0
+// MFCR <kill of whatever preg got assigned to vreg>
+// For now DeadMachineInstructionElim is turned off, so don't do the marking.
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR. Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IntMTFSB0,
+ [(PPCmtfsb0 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IntMTFSB0,
+ [(PPCmtfsb1 (i32 imm:$FM))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ // MTFSF does not actually produce an FP result. We pretend it copies
+ // input reg B to the output. If we didn't do this it would look like the
+ // instruction had no outputs (because we aren't modelling the FPSCR) and
+ // it would be deleted.
+ def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA),
+ (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+ "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+ [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM),
+ F8RC:$rT, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ "mffs $rT", IntMFFS,
+ [(set F8RC:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def FADDrtz: AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in { // FXU Operations.
+
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+//
+def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "add $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "addc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_Cracked;
+def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "adde $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divw $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "divwu $rT, $rA, $rB", IntDivW,
+ [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mulhwu $rT, $rA, $rB", IntMulHWU,
+ [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "mullw $rT, $rA, $rB", IntMulHW,
+ [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subf $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfc $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+ PPC970_DGroup_Cracked;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+ "subfe $rT, $rA, $rB", IntGeneral,
+ [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "addze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "neg $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfme $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+ "subfze $rT, $rA", IntGeneral,
+ [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in { // FPU Operations.
+let Uses = [RM] in {
+ def FMADD : AForm_1<63, 29,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMADDS : AForm_1<59, 29,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUB : AForm_1<63, 28,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FMSUBS : AForm_1<59, 28,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB))]>,
+ Requires<[FPContractions]>;
+ def FNMADD : AForm_1<63, 31,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMADDS : AForm_1<59, 31,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUB : AForm_1<63, 30,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+ def FNMSUBS : AForm_1<59, 30,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+ (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+ "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+ def FADD : AForm_2<63, 21,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fadd $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+ def FADDS : AForm_2<59, 21,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fadds $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+ def FDIV : AForm_2<63, 18,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fdiv $FRT, $FRA, $FRB", FPDivD,
+ [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+ def FDIVS : AForm_2<59, 18,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fdivs $FRT, $FRA, $FRB", FPDivS,
+ [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+ def FMUL : AForm_3<63, 25,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fmul $FRT, $FRA, $FRB", FPFused,
+ [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+ def FMULS : AForm_3<59, 25,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fmuls $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+ def FSUB : AForm_2<63, 20,
+ (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+ "fsub $FRT, $FRA, $FRB", FPGeneral,
+ [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+ def FSUBS : AForm_2<59, 20,
+ (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+ "fsubs $FRT, $FRA, $FRB", FPGeneral,
+ [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+ }
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+ (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+def RLWINMo : MForm_2<21,
+ (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM : MForm_2<23,
+ (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ "${:comment} .loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+ (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+ (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+ (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+ (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub immSExt16:$imm, GPRC:$in),
+ (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+ (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+ (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+ (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)),
+ (BL_Macho tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)),
+ (BL_Macho texternalsym:$dst)>;
+def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
+ (BL_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
+ (BL_ELF texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS GPRC:$in, tjumptable:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+ (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+ (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+ (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(extloadf32 iaddr:$src),
+ (FMRSD (LFS iaddr:$src))>;
+def : Pat<(extloadf32 xaddr:$src),
+ (FMRSD (LFSX xaddr:$src))>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm:$ll),
+ (i32 imm:$ls),
+ (i32 imm:$sl),
+ (i32 imm:$ss),
+ (i32 imm:$device)),
+ (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 000000000000..035647ec5a30
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,437 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/System/Memory.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC64 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl PPCCompilationCallbackC\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#else
+void PPC32CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+ defined(__ppc64__)
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr r0\n"
+ "std r0, 16(r1)\n"
+ "stdu r1, -280(r1)\n"
+ // Save all int arg registers
+ "std r10, 272(r1)\n" "std r9, 264(r1)\n"
+ "std r8, 256(r1)\n" "std r7, 248(r1)\n"
+ "std r6, 240(r1)\n" "std r5, 232(r1)\n"
+ "std r4, 224(r1)\n" "std r3, 216(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n"
+ "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n"
+ "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n"
+ "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n"
+ "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n"
+ "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n"
+ "stfd f1, 112(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr r3, r0\n"
+ "ld r2, 280(r1)\n" // stub's frame
+ "ld r4, 16(r2)\n" // stub's lr
+ "li r5, 1\n" // 1 == 64 bit
+ "bl _PPCCompilationCallbackC\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "ld r10, 272(r1)\n" "ld r9, 264(r1)\n"
+ "ld r8, 256(r1)\n" "ld r7, 248(r1)\n"
+ "ld r6, 240(r1)\n" "ld r5, 232(r1)\n"
+ "ld r4, 224(r1)\n" "ld r3, 216(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n"
+ "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n"
+ "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n"
+ "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n"
+ "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n"
+ "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n"
+ "lfd f1, 112(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld r1, 280(r1)\n"
+ "ld r2, 16(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+#else
+void PPC64CompilationCallback() {
+ assert(0 && "This is not a power pc, you can't execute this!");
+ abort();
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ JCE.startGVStub(F, 7*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4);
+ return JCE.finishGVStub(F);
+ }
+
+ JCE.startGVStub(F, 10*4);
+ intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (TM.getSubtargetImpl()->isMachoABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4);
+ return JCE.finishGVStub(F);
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: assert(0 && "Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+}
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 000000000000..2e25b295f432
--- /dev/null
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,48 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+ class PPCTargetMachine;
+
+ class PPCJITInfo : public TargetJITInfo {
+ protected:
+ PPCTargetMachine &TM;
+ bool is64Bit;
+ public:
+ PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+ useGOT = 0;
+ is64Bit = tmIs64Bit;
+ }
+
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ };
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
new file mode 100644
index 000000000000..3bfa6d719105
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp
@@ -0,0 +1,151 @@
+//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachOWriterInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachORelocation.h"
+#include "llvm/Support/OutputBuffer.h"
+#include <cstdio>
+using namespace llvm;
+
+PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM)
+ : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ?
+ HDR_CPU_TYPE_POWERPC64 :
+ HDR_CPU_TYPE_POWERPC,
+ HDR_CPU_SUBTYPE_POWERPC_ALL) {}
+PPCMachOWriterInfo::~PPCMachOWriterInfo() {}
+
+/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or
+/// more PowerPC MachORelocation(s), add the new relocations to the
+/// MachOSection, and rewrite the instruction at the section offset if required
+/// by that relocation type.
+unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered,
+ bool isExtern) const {
+ unsigned NumRelocs = 0;
+ uint64_t Addr = 0;
+
+ // Get the address of whatever it is we're relocating, if possible.
+ if (!isExtern)
+ Addr = (uintptr_t)MR.getResultPointer() + ToAddr;
+
+ switch ((PPC::RelocationType)MR.getRelocationType()) {
+ default: assert(0 && "Unknown PPC relocation type!");
+ case PPC::reloc_absolute_low_ix:
+ assert(0 && "Unhandled PPC relocation type!");
+ break;
+ case PPC::reloc_vanilla:
+ {
+ // FIXME: need to handle 64 bit vanilla relocs
+ MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx,
+ false, 2, isExtern,
+ PPC_RELOC_VANILLA,
+ Scattered, (intptr_t)MR.getResultPointer());
+ ++NumRelocs;
+
+ if (Scattered) {
+ RelocOut.outword(VANILLA.getPackedFields());
+ RelocOut.outword(VANILLA.getAddress());
+ } else {
+ RelocOut.outword(VANILLA.getAddress());
+ RelocOut.outword(VANILLA.getPackedFields());
+ }
+
+ intptr_t SymbolOffset;
+
+ if (Scattered)
+ SymbolOffset = Addr + MR.getConstantVal();
+ else
+ SymbolOffset = Addr;
+
+ printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx,
+ unsigned(MR.getMachineCodeOffset()),
+ unsigned(SymbolOffset));
+ SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset());
+ }
+ break;
+ case PPC::reloc_pcrel_bx:
+ {
+ // FIXME: Presumably someday we will need to branch to other, non-extern
+ // functions too. Need to figure out some way to distinguish between
+ // target is BB and target is function.
+ if (isExtern) {
+ MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2,
+ isExtern, PPC_RELOC_BR24, Scattered,
+ (intptr_t)MR.getMachineCodeOffset());
+ RelocOut.outword(BR24.getAddress());
+ RelocOut.outword(BR24.getPackedFields());
+ ++NumRelocs;
+ }
+
+ Addr -= MR.getMachineCodeOffset();
+ Addr >>= 2;
+ Addr &= 0xFFFFFF;
+ Addr <<= 2;
+ Addr |= (SecOut[MR.getMachineCodeOffset()] << 24);
+ Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3);
+ SecOut.fixword(Addr, MR.getMachineCodeOffset());
+ break;
+ }
+ case PPC::reloc_pcrel_bcx:
+ {
+ Addr -= MR.getMachineCodeOffset();
+ Addr &= 0xFFFC;
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_high:
+ {
+ MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_HA16);
+ MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(HA16.getRawAddress());
+ RelocOut.outword(HA16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ Addr += 0x8000;
+
+ SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ case PPC::reloc_absolute_low:
+ {
+ MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2,
+ isExtern, PPC_RELOC_LO16);
+ MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern,
+ PPC_RELOC_PAIR);
+ NumRelocs = 2;
+
+ RelocOut.outword(LO16.getRawAddress());
+ RelocOut.outword(LO16.getPackedFields());
+ RelocOut.outword(PAIR.getRawAddress());
+ RelocOut.outword(PAIR.getPackedFields());
+
+ SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2);
+ break;
+ }
+ }
+
+ return NumRelocs;
+}
diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h
new file mode 100644
index 000000000000..d46334df2602
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachOWriterInfo.h
@@ -0,0 +1,55 @@
+//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Mach-O writer information for the PowerPC backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHO_WRITER_INFO_H
+#define PPC_MACHO_WRITER_INFO_H
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+
+namespace llvm {
+
+ // Forward declarations
+ class MachineRelocation;
+ class OutputBuffer;
+ class PPCTargetMachine;
+
+ class PPCMachOWriterInfo : public TargetMachOWriterInfo {
+ public:
+ PPCMachOWriterInfo(const PPCTargetMachine &TM);
+ virtual ~PPCMachOWriterInfo();
+
+ virtual unsigned GetTargetRelocation(MachineRelocation &MR,
+ unsigned FromIdx,
+ unsigned ToAddr,
+ unsigned ToIdx,
+ OutputBuffer &RelocOut,
+ OutputBuffer &SecOut,
+ bool Scattered, bool Extern) const;
+
+ // Constants for the relocation r_type field.
+ // See <mach-o/ppc/reloc.h>
+ enum {
+ PPC_RELOC_VANILLA, // generic relocation
+ PPC_RELOC_PAIR, // the second relocation entry of a pair
+ PPC_RELOC_BR14, // 14 bit branch displacement to word address
+ PPC_RELOC_BR24, // 24 bit branch displacement to word address
+ PPC_RELOC_HI16, // a PAIR follows with the low 16 bits
+ PPC_RELOC_LO16, // a PAIR follows with the high 16 bits
+ PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b
+ PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero
+ };
+ };
+
+} // end llvm namespace
+
+#endif // PPC_MACHO_WRITER_INFO_H
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 000000000000..42883d78728e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,104 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+public:
+ PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ SpillsCR(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 000000000000..3164e33faae9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 000000000000..08a281259e1f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,30 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ default: assert(0 && "Unknown PPC branch opcode!");
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ }
+}
diff --git a/lib/Target/PowerPC/PPCPredicates.h b/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 000000000000..b2c831579f79
--- /dev/null
+++ b/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_ALWAYS = (0 << 5) | 20,
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 000000000000..5d5beebda705
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,1446 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary
+// bigger than the default (16 bytes on Darwin) when there is a stack local
+// of greater alignment. This does not currently work, because the delta
+// between old and new stack pointers is added to offsets that reference
+// incoming parameters after the prolog is generated, and the code that
+// does that doesn't handle a variable delta. You don't want to do that
+// anyway; a better approach is to reserve another register that retains
+// to the incoming stack pointer, and reference parameters relative to that.
+#define ALIGN_STACK 0
+
+// FIXME (64-bit): Eventually enable by default.
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC32 register scavenger"),
+ cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+ cl::init(false),
+ cl::desc("Enable PPC64 register scavenger"),
+ cl::Hidden);
+#define EnableRegisterScavenging \
+ ((EnablePPC32RS && !Subtarget.isPPC64()) || \
+ (EnablePPC64RS && Subtarget.isPPC64()))
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+ return EnableRegisterScavenging;
+}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace PPC;
+ switch (RegEnum) {
+ case 0: return 0;
+ case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0;
+ case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1;
+ case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2;
+ case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3;
+ case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4;
+ case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5;
+ case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6;
+ case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7;
+ case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8;
+ case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9;
+ case R10: case X10: case F10: case V10: case CR2EQ: return 10;
+ case R11: case X11: case F11: case V11: case CR2UN: return 11;
+ case R12: case X12: case F12: case V12: case CR3LT: return 12;
+ case R13: case X13: case F13: case V13: case CR3GT: return 13;
+ case R14: case X14: case F14: case V14: case CR3EQ: return 14;
+ case R15: case X15: case F15: case V15: case CR3UN: return 15;
+ case R16: case X16: case F16: case V16: case CR4LT: return 16;
+ case R17: case X17: case F17: case V17: case CR4GT: return 17;
+ case R18: case X18: case F18: case V18: case CR4EQ: return 18;
+ case R19: case X19: case F19: case V19: case CR4UN: return 19;
+ case R20: case X20: case F20: case V20: case CR5LT: return 20;
+ case R21: case X21: case F21: case V21: case CR5GT: return 21;
+ case R22: case X22: case F22: case V22: case CR5EQ: return 22;
+ case R23: case X23: case F23: case V23: case CR5UN: return 23;
+ case R24: case X24: case F24: case V24: case CR6LT: return 24;
+ case R25: case X25: case F25: case V25: case CR6GT: return 25;
+ case R26: case X26: case F26: case V26: case CR6EQ: return 26;
+ case R27: case X27: case F27: case V27: case CR6UN: return 27;
+ case R28: case X28: case F28: case V28: case CR7LT: return 28;
+ case R29: case X29: case F29: case V29: case CR7GT: return 29;
+ case R30: case X30: case F30: case V30: case CR7EQ: return 30;
+ case R31: case X31: case F31: case V31: case CR7UN: return 31;
+ default:
+ cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n";
+ abort();
+ }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+ const TargetInstrInfo &tii)
+ : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ else
+ return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ // 32-bit Darwin calling convention.
+ static const unsigned Macho32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+
+ static const unsigned ELF32_CalleeSavedRegs[] = {
+ PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+ PPC::F9,
+ PPC::F10, PPC::F11, PPC::F12, PPC::F13,
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR, 0
+ };
+ // 64-bit Darwin calling convention.
+ static const unsigned Macho64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ PPC::LR8, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs :
+ Macho32_CalleeSavedRegs;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ // 32-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = {
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+ &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+
+ &PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::GPRCRegClass, 0
+ };
+
+ // 64-bit Macho calling convention.
+ static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = {
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ &PPC::G8RCRegClass, 0
+ };
+
+ if (Subtarget.isMachoABI())
+ return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses :
+ Macho32_CalleeSavedRegClasses;
+
+ // ELF 32.
+ return ELF32_CalleeSavedRegClasses;
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+static bool needsFP(const MachineFunction &MF) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects() ||
+ (PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(PPC::R0);
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ // In Linux, r2 is reserved for the OS.
+ if (!Subtarget.isDarwin())
+ Reserved.set(PPC::R2);
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register. Note
+ // that this is over conservative, as it also prevents allocation of R31 when
+ // the FP is not needed.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+ Reserved.set(PPC::R31);
+
+ if (!EnableRegisterScavenging)
+ Reserved.set(PPC::R0); // FIXME (64-bit): Remove
+
+ Reserved.set(PPC::X0);
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+ Reserved.set(PPC::X31);
+ }
+
+ if (needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getStackSize() && needsFP(MF);
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt16(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+ addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(StackReg)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+ const TargetRegisterClass *RC, int SPAdj) {
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC, true);
+ // FIXME: move ARM callee-saved reg scan to target independent code, then
+ // search for already spilled CS register here.
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert(MaxAlign <= TargetAlign &&
+ "Dynamic alloca with large aligns not supported");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+ // FIXME (64-bit): Use "findScratchRegister"
+ unsigned Reg;
+ if (EnableRegisterScavenging)
+ Reg = findScratchRegister(II, RS, RC, SPAdj);
+ else
+ Reg = PPC::R0;
+
+ if (MaxAlign < TargetAlign && isInt16(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part.
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+ else
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X0, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(MI.getOperand(1).getReg());
+
+ if (!MI.getOperand(1).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ else
+ // Implicitly kill the register.
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize)
+ .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFCR to save all of the CRBits. Add an implicit kill of the CR.
+ if (!MI.getOperand(0).isKill())
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg);
+ else
+ // Implicitly kill the CR register.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg)
+ .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill);
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ unsigned SrcReg = MI.getOperand(0).getReg();
+ if (SrcReg != PPC::CR0)
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Find out which operand is the frame index.
+ unsigned FIOperandNo = 0;
+ while (!MI.getOperand(FIOperandNo).isFI()) {
+ ++FIOperandNo;
+ assert(FIOperandNo != MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+ if (MI.getOpcode() == TargetInstrInfo::INLINEASM)
+ OffsetOperandNo = FIOperandNo-1;
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II, SPAdj, RS);
+ return;
+ }
+
+ // Special case for pseudo-op SPILL_CR.
+ if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1,
+ false);
+
+ // Figure out if the offset in the instruction is shifted right two bits. This
+ // is true for instructions like "STD", which the machine implicitly adds two
+ // low zeros to.
+ bool isIXAddr = false;
+ switch (OpC) {
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::STD_32:
+ isIXAddr = true;
+ break;
+ }
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ Offset += MFI->getStackSize();
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+ // FIXME: figure out what SPAdj is doing here.
+
+ // FIXME (64-bit): Use "findScratchRegister".
+ unsigned SReg;
+ if (EnableRegisterScavenging)
+ SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+ else
+ SReg = PPC::R0;
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+ .addReg(SReg, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (OpC != TargetInstrInfo::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().getDesc().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+ if (VRRegNo[RegNo] == *I) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1; //
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ if (!DisableRedZone &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->hasCalls() && // No calls.
+ (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ // No need for frame
+ MFI->setStackSize(0);
+ return;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage and 8 args.
+ unsigned minCallFrameSize =
+ PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(),
+ Subtarget.isMachoABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void
+PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MF.getRegInfo().setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool IsPPC64 = Subtarget.isPPC64();
+ bool IsELF32_ABI = Subtarget.isELF32_ABI();
+ bool IsMachoABI = Subtarget.isMachoABI();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) &&
+ IsELF32_ABI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+ IsMachoABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
+ int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
+ MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
+ AddFPOffsetAmount + TCSPDelta);
+ }
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+ // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+ // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+ // r0 for now.
+
+ if (EnableRegisterScavenging) // FIXME (64-bit): Enable.
+ if (needsFP(MF) || spillsCR(MF)) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+}
+
+void
+PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
+ !MF.getFunction()->doesNotThrow() ||
+ UnwindTablesMandatory;
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0;
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ unsigned FrameSize = MFI->getStackSize();
+
+ int NegFrameSize = -FrameSize;
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ if (IsPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X31)
+ .addImm(FPOffset/4)
+ .addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+ .addReg(PPC::X0)
+ .addImm(LROffset / 4)
+ .addReg(PPC::X1);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R31)
+ .addImm(FPOffset)
+ .addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+ .addReg(PPC::R0)
+ .addImm(LROffset)
+ .addReg(PPC::R1);
+ }
+
+ // Skip if a leaf routine.
+ if (!FrameSize) return;
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+ if (!IsPPC64) {
+ // PPC32.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+ .addReg(PPC::R1)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+ .addReg(PPC::R1)
+ .addImm(NegFrameSize)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+ .addReg(PPC::R1)
+ .addReg(PPC::R1)
+ .addReg(PPC::R0);
+ }
+ } else { // PPC64.
+ if (ALIGN_STACK && MaxAlign > TargetAlign) {
+ assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
+ assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+ .addReg(PPC::X1)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+ .addReg(PPC::X0)
+ .addImm(NegFrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ } else if (isInt16(NegFrameSize)) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+ .addReg(PPC::X1)
+ .addImm(NegFrameSize / 4)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+ .addReg(PPC::X1)
+ .addReg(PPC::X1)
+ .addReg(PPC::X0);
+ }
+ }
+
+ if (needsFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ if (NegFrameSize) {
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, SP, SP));
+ }
+
+ if (HasFP) {
+ MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+ MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31);
+ Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
+
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
+ (IsPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ if (!IsPPC64) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+ .addReg(PPC::R1)
+ .addReg(PPC::R1);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+ .addReg(PPC::X1)
+ .addReg(PPC::X1);
+ }
+ }
+}
+
+void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ assert( (RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore r1
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool IsPPC64 = Subtarget.isPPC64();
+ // Get operating system
+ bool IsMachoABI = Subtarget.isMachoABI();
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ // Do we have a frame pointer for this function?
+ bool HasFP = hasFP(MF) && FrameSize;
+
+ int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
+ int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ if (FrameSize) {
+ // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+ // on entry to the function. Add this offset back now.
+ if (!IsPPC64) {
+ // If this function contained a fastcc call and PerformTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+ .addReg(PPC::R0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+ .addReg(PPC::R1)
+ .addReg(PPC::R31)
+ .addReg(PPC::R0);
+ } else if (isInt16(FrameSize) &&
+ (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+ .addReg(PPC::R1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+ .addImm(0).addReg(PPC::R1);
+ }
+ } else {
+ if (FI->hasFastCall() && isInt16(FrameSize)) {
+ assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X31).addImm(FrameSize);
+ } else if(FI->hasFastCall()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+ .addReg(PPC::X0, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+ .addReg(PPC::X1)
+ .addReg(PPC::X31)
+ .addReg(PPC::X0);
+ } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+ .addReg(PPC::X1).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+ .addImm(0).addReg(PPC::X1);
+ }
+ }
+ }
+
+ if (IsPPC64) {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+ .addImm(LROffset/4).addReg(PPC::X1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+ .addImm(FPOffset/4).addReg(PPC::X1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+ } else {
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+ .addImm(LROffset).addReg(PPC::R1);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+ .addImm(FPOffset).addReg(PPC::R1);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+ }
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (PerformTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+ unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1;
+ unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31;
+ unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI;
+
+ if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+ .addReg(StackReg)
+ .addReg(FPReg)
+ .addReg(TmpReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = prior(MBB.end());
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = prior(MBB.end());
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+ return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ if (!Subtarget.isPPC64())
+ return hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(PPC::R1, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+ return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+ return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "PPCGenRegisterInfo.inc"
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 000000000000..9506b651c5b3
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,95 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include "PPCGenRegisterInfo.h.inc"
+#include <map>
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ std::map<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// PPC::F14, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *getPointerRegClass() const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const { return true; }
+
+ /// requiresRegisterScavenging - We require a register scavenger.
+ /// FIXME (64-bit): Should be inlined.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ /// determineFrameLayout - Determine the size of the frame and maximum call
+ /// frame size.
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 000000000000..9e15a55781c8
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,360 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ field bits<5> Num = SubReg.Num;
+ let SubRegs = [SubReg];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n> : PPCReg<n> {
+ field bits<3> Num = num;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0 : GPR< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : GPR< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : GPR< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : GPR< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : GPR< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : GPR< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : GPR< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : GPR< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : GPR< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : GPR< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[31]>;
+
+// 64-bit General-purpose registers
+def X0 : GP8< R0, "r0">, DwarfRegNum<[0]>;
+def X1 : GP8< R1, "r1">, DwarfRegNum<[1]>;
+def X2 : GP8< R2, "r2">, DwarfRegNum<[2]>;
+def X3 : GP8< R3, "r3">, DwarfRegNum<[3]>;
+def X4 : GP8< R4, "r4">, DwarfRegNum<[4]>;
+def X5 : GP8< R5, "r5">, DwarfRegNum<[5]>;
+def X6 : GP8< R6, "r6">, DwarfRegNum<[6]>;
+def X7 : GP8< R7, "r7">, DwarfRegNum<[7]>;
+def X8 : GP8< R8, "r8">, DwarfRegNum<[8]>;
+def X9 : GP8< R9, "r9">, DwarfRegNum<[9]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>;
+def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>;
+def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>;
+def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>;
+def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>;
+def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>;
+def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>;
+def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>;
+def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>;
+def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63]>;
+
+// Vector registers
+def V0 : VR< 0, "v0">, DwarfRegNum<[77]>;
+def V1 : VR< 1, "v1">, DwarfRegNum<[78]>;
+def V2 : VR< 2, "v2">, DwarfRegNum<[79]>;
+def V3 : VR< 3, "v3">, DwarfRegNum<[80]>;
+def V4 : VR< 4, "v4">, DwarfRegNum<[81]>;
+def V5 : VR< 5, "v5">, DwarfRegNum<[82]>;
+def V6 : VR< 6, "v6">, DwarfRegNum<[83]>;
+def V7 : VR< 7, "v7">, DwarfRegNum<[84]>;
+def V8 : VR< 8, "v8">, DwarfRegNum<[85]>;
+def V9 : VR< 9, "v9">, DwarfRegNum<[86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108]>;
+
+// Condition registers
+def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>;
+def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>;
+def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>;
+def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>;
+def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>;
+def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>;
+def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>;
+def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>;
+def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>;
+def CR0EQ : CRBIT< 2, "2">, DwarfRegNum<[0]>;
+def CR0UN : CRBIT< 3, "3">, DwarfRegNum<[0]>;
+def CR1LT : CRBIT< 4, "4">, DwarfRegNum<[0]>;
+def CR1GT : CRBIT< 5, "5">, DwarfRegNum<[0]>;
+def CR1EQ : CRBIT< 6, "6">, DwarfRegNum<[0]>;
+def CR1UN : CRBIT< 7, "7">, DwarfRegNum<[0]>;
+def CR2LT : CRBIT< 8, "8">, DwarfRegNum<[0]>;
+def CR2GT : CRBIT< 9, "9">, DwarfRegNum<[0]>;
+def CR2EQ : CRBIT<10, "10">, DwarfRegNum<[0]>;
+def CR2UN : CRBIT<11, "11">, DwarfRegNum<[0]>;
+def CR3LT : CRBIT<12, "12">, DwarfRegNum<[0]>;
+def CR3GT : CRBIT<13, "13">, DwarfRegNum<[0]>;
+def CR3EQ : CRBIT<14, "14">, DwarfRegNum<[0]>;
+def CR3UN : CRBIT<15, "15">, DwarfRegNum<[0]>;
+def CR4LT : CRBIT<16, "16">, DwarfRegNum<[0]>;
+def CR4GT : CRBIT<17, "17">, DwarfRegNum<[0]>;
+def CR4EQ : CRBIT<18, "18">, DwarfRegNum<[0]>;
+def CR4UN : CRBIT<19, "19">, DwarfRegNum<[0]>;
+def CR5LT : CRBIT<20, "20">, DwarfRegNum<[0]>;
+def CR5GT : CRBIT<21, "21">, DwarfRegNum<[0]>;
+def CR5EQ : CRBIT<22, "22">, DwarfRegNum<[0]>;
+def CR5UN : CRBIT<23, "23">, DwarfRegNum<[0]>;
+def CR6LT : CRBIT<24, "24">, DwarfRegNum<[0]>;
+def CR6GT : CRBIT<25, "25">, DwarfRegNum<[0]>;
+def CR6EQ : CRBIT<26, "26">, DwarfRegNum<[0]>;
+def CR6UN : CRBIT<27, "27">, DwarfRegNum<[0]>;
+def CR7LT : CRBIT<28, "28">, DwarfRegNum<[0]>;
+def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>;
+def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
+def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
+
+def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
+def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
+def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
+def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
+ [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">, DwarfRegNum<[0]>;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32,
+ [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
+ R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
+ R16, R15, R14, R13, R31, R0, R1, LR]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GPRCClass::iterator
+ GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+ // In Linux, r2 is reserved for the OS.
+ if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
+ return begin()+1;
+
+ return begin();
+ }
+ GPRCClass::iterator
+ GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is overconservative, as it also prevents allocation of
+ // R31 when the FP is not needed.
+ if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64())
+ return end()-5; // don't allocate R13, R31, R0, R1, LR
+
+ if (needsFP(MF))
+ return end()-4; // don't allocate R31, R0, R1, LR
+ else
+ return end()-3; // don't allocate R0, R1, LR
+ }
+ }];
+}
+def G8RC : RegisterClass<"PPC", [i64], 64,
+ [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
+ X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
+ X16, X15, X14, X31, X13, X0, X1, LR8]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ G8RCClass::iterator
+ G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ G8RCClass::iterator
+ G8RCClass::allocation_order_end(const MachineFunction &MF) const {
+ if (needsFP(MF))
+ return end()-5;
+ else
+ return end()-4;
+ }
+ }];
+}
+
+
+
+def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
+ F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ [V2, V3, V4, V5, V0, V1,
+ V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
+ V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>;
+
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
+ CR3, CR4]>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+ [CR0LT, CR0GT, CR0EQ, CR0UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN
+ ]>
+{
+ let CopyCost = -1;
+}
+
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 000000000000..a33e7e03370c
--- /dev/null
+++ b/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 000000000000..d589414c0154
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,508 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU : FuncUnit; // Branch unit
+def SLU : FuncUnit; // Store/load unit
+def SRU : FuncUnit; // special register unit
+def IU1 : FuncUnit; // integer unit 1 (simple)
+def IU2 : FuncUnit; // integer unit 2 (complex)
+def IU3 : FuncUnit; // integer unit 3 (7450 simple)
+def IU4 : FuncUnit; // integer unit 4 (7450 simple)
+def FPU1 : FuncUnit; // floating point unit 1
+def FPU2 : FuncUnit; // floating point unit 2
+def VPU : FuncUnit; // vector permutation unit
+def VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def VFPU : FuncUnit; // vector floating point unit
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral : InstrItinClass;
+def IntCompare : InstrItinClass;
+def IntDivD : InstrItinClass;
+def IntDivW : InstrItinClass;
+def IntMFFS : InstrItinClass;
+def IntMFVSCR : InstrItinClass;
+def IntMTFSB0 : InstrItinClass;
+def IntMTSRD : InstrItinClass;
+def IntMulHD : InstrItinClass;
+def IntMulHW : InstrItinClass;
+def IntMulHWU : InstrItinClass;
+def IntMulLI : InstrItinClass;
+def IntRFID : InstrItinClass;
+def IntRotateD : InstrItinClass;
+def IntRotate : InstrItinClass;
+def IntShift : InstrItinClass;
+def IntTrapD : InstrItinClass;
+def IntTrapW : InstrItinClass;
+def BrB : InstrItinClass;
+def BrCR : InstrItinClass;
+def BrMCR : InstrItinClass;
+def BrMCRX : InstrItinClass;
+def LdStDCBA : InstrItinClass;
+def LdStDCBF : InstrItinClass;
+def LdStDCBI : InstrItinClass;
+def LdStGeneral : InstrItinClass;
+def LdStDSS : InstrItinClass;
+def LdStICBI : InstrItinClass;
+def LdStUX : InstrItinClass;
+def LdStLD : InstrItinClass;
+def LdStLDARX : InstrItinClass;
+def LdStLFD : InstrItinClass;
+def LdStLFDU : InstrItinClass;
+def LdStLHA : InstrItinClass;
+def LdStLMW : InstrItinClass;
+def LdStLVecX : InstrItinClass;
+def LdStLWA : InstrItinClass;
+def LdStLWARX : InstrItinClass;
+def LdStSLBIA : InstrItinClass;
+def LdStSLBIE : InstrItinClass;
+def LdStSTD : InstrItinClass;
+def LdStSTDCX : InstrItinClass;
+def LdStSTVEBX : InstrItinClass;
+def LdStSTWCX : InstrItinClass;
+def LdStSync : InstrItinClass;
+def SprISYNC : InstrItinClass;
+def SprMFSR : InstrItinClass;
+def SprMTMSR : InstrItinClass;
+def SprMTSR : InstrItinClass;
+def SprTLBSYNC : InstrItinClass;
+def SprMFCR : InstrItinClass;
+def SprMFMSR : InstrItinClass;
+def SprMFSPR : InstrItinClass;
+def SprMFTB : InstrItinClass;
+def SprMTSPR : InstrItinClass;
+def SprMTSRIN : InstrItinClass;
+def SprRFI : InstrItinClass;
+def SprSC : InstrItinClass;
+def FPGeneral : InstrItinClass;
+def FPCompare : InstrItinClass;
+def FPDivD : InstrItinClass;
+def FPDivS : InstrItinClass;
+def FPFused : InstrItinClass;
+def FPRes : InstrItinClass;
+def FPSqrt : InstrItinClass;
+def VecGeneral : InstrItinClass;
+def VecFP : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex : InstrItinClass;
+def VecPerm : InstrItinClass;
+def VecFPRound : InstrItinClass;
+def VecVSL : InstrItinClass;
+def VecVSR : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IntGeneral
+// addc IntGeneral
+// adde IntGeneral
+// addi IntGeneral
+// addic IntGeneral
+// addic. IntGeneral
+// addis IntGeneral
+// addme IntGeneral
+// addze IntGeneral
+// and IntGeneral
+// andc IntGeneral
+// andi. IntGeneral
+// andis. IntGeneral
+// b BrB
+// bc BrB
+// bcctr BrB
+// bclr BrB
+// cmp IntCompare
+// cmpi IntCompare
+// cmpl IntCompare
+// cmpli IntCompare
+// cntlzd IntRotateD
+// cntlzw IntGeneral
+// crand BrCR
+// crandc BrCR
+// creqv BrCR
+// crnand BrCR
+// crnor BrCR
+// cror BrCR
+// crorc BrCR
+// crxor BrCR
+// dcba LdStDCBA
+// dcbf LdStDCBF
+// dcbi LdStDCBI
+// dcbst LdStDCBF
+// dcbt LdStGeneral
+// dcbtst LdStGeneral
+// dcbz LdStDCBF
+// divd IntDivD
+// divdu IntDivD
+// divw IntDivW
+// divwu IntDivW
+// dss LdStDSS
+// dst LdStDSS
+// dstst LdStDSS
+// eciwx LdStGeneral
+// ecowx LdStGeneral
+// eieio LdStGeneral
+// eqv IntGeneral
+// extsb IntGeneral
+// extsh IntGeneral
+// extsw IntRotateD
+// fabs FPGeneral
+// fadd FPGeneral
+// fadds FPGeneral
+// fcfid FPGeneral
+// fcmpo FPCompare
+// fcmpu FPCompare
+// fctid FPGeneral
+// fctidz FPGeneral
+// fctiw FPGeneral
+// fctiwz FPGeneral
+// fdiv FPDivD
+// fdivs FPDivS
+// fmadd FPFused
+// fmadds FPGeneral
+// fmr FPGeneral
+// fmsub FPFused
+// fmsubs FPGeneral
+// fmul FPFused
+// fmuls FPGeneral
+// fnabs FPGeneral
+// fneg FPGeneral
+// fnmadd FPFused
+// fnmadds FPGeneral
+// fnmsub FPFused
+// fnmsubs FPGeneral
+// fres FPRes
+// frsp FPGeneral
+// frsqrte FPGeneral
+// fsel FPGeneral
+// fsqrt FPSqrt
+// fsqrts FPSqrt
+// fsub FPGeneral
+// fsubs FPGeneral
+// icbi LdStICBI
+// isync SprISYNC
+// lbz LdStGeneral
+// lbzu LdStGeneral
+// lbzux LdStUX
+// lbzx LdStGeneral
+// ld LdStLD
+// ldarx LdStLDARX
+// ldu LdStLD
+// ldux LdStLD
+// ldx LdStLD
+// lfd LdStLFD
+// lfdu LdStLFDU
+// lfdux LdStLFDU
+// lfdx LdStLFDU
+// lfs LdStLFDU
+// lfsu LdStLFDU
+// lfsux LdStLFDU
+// lfsx LdStLFDU
+// lha LdStLHA
+// lhau LdStLHA
+// lhaux LdStLHA
+// lhax LdStLHA
+// lhbrx LdStGeneral
+// lhz LdStGeneral
+// lhzu LdStGeneral
+// lhzux LdStUX
+// lhzx LdStGeneral
+// lmw LdStLMW
+// lswi LdStLMW
+// lswx LdStLMW
+// lvebx LdStLVecX
+// lvehx LdStLVecX
+// lvewx LdStLVecX
+// lvsl LdStLVecX
+// lvsr LdStLVecX
+// lvx LdStLVecX
+// lvxl LdStLVecX
+// lwa LdStLWA
+// lwarx LdStLWARX
+// lwaux LdStLHA
+// lwax LdStLHA
+// lwbrx LdStGeneral
+// lwz LdStGeneral
+// lwzu LdStGeneral
+// lwzux LdStUX
+// lwzx LdStGeneral
+// mcrf BrMCR
+// mcrfs FPGeneral
+// mcrxr BrMCRX
+// mfcr SprMFCR
+// mffs IntMFFS
+// mfmsr SprMFMSR
+// mfspr SprMFSPR
+// mfsr SprMFSR
+// mfsrin SprMFSR
+// mftb SprMFTB
+// mfvscr IntMFVSCR
+// mtcrf BrMCRX
+// mtfsb0 IntMTFSB0
+// mtfsb1 IntMTFSB0
+// mtfsf IntMTFSB0
+// mtfsfi IntMTFSB0
+// mtmsr SprMTMSR
+// mtmsrd LdStLD
+// mtspr SprMTSPR
+// mtsr SprMTSR
+// mtsrd IntMTSRD
+// mtsrdin IntMTSRD
+// mtsrin SprMTSRIN
+// mtvscr IntMFVSCR
+// mulhd IntMulHD
+// mulhdu IntMulHD
+// mulhw IntMulHW
+// mulhwu IntMulHWU
+// mulld IntMulHD
+// mulli IntMulLI
+// mullw IntMulHW
+// nand IntGeneral
+// neg IntGeneral
+// nor IntGeneral
+// or IntGeneral
+// orc IntGeneral
+// ori IntGeneral
+// oris IntGeneral
+// rfi SprRFI
+// rfid IntRFID
+// rldcl IntRotateD
+// rldcr IntRotateD
+// rldic IntRotateD
+// rldicl IntRotateD
+// rldicr IntRotateD
+// rldimi IntRotateD
+// rlwimi IntRotate
+// rlwinm IntGeneral
+// rlwnm IntGeneral
+// sc SprSC
+// slbia LdStSLBIA
+// slbie LdStSLBIE
+// sld IntRotateD
+// slw IntGeneral
+// srad IntRotateD
+// sradi IntRotateD
+// sraw IntShift
+// srawi IntShift
+// srd IntRotateD
+// srw IntGeneral
+// stb LdStGeneral
+// stbu LdStGeneral
+// stbux LdStGeneral
+// stbx LdStGeneral
+// std LdStSTD
+// stdcx. LdStSTDCX
+// stdu LdStSTD
+// stdux LdStSTD
+// stdx LdStSTD
+// stfd LdStUX
+// stfdu LdStUX
+// stfdux LdStUX
+// stfdx LdStUX
+// stfiwx LdStUX
+// stfs LdStUX
+// stfsu LdStUX
+// stfsux LdStUX
+// stfsx LdStUX
+// sth LdStGeneral
+// sthbrx LdStGeneral
+// sthu LdStGeneral
+// sthux LdStGeneral
+// sthx LdStGeneral
+// stmw LdStLMW
+// stswi LdStLMW
+// stswx LdStLMW
+// stvebx LdStSTVEBX
+// stvehx LdStSTVEBX
+// stvewx LdStSTVEBX
+// stvx LdStSTVEBX
+// stvxl LdStSTVEBX
+// stw LdStGeneral
+// stwbrx LdStGeneral
+// stwcx. LdStSTWCX
+// stwu LdStGeneral
+// stwux LdStGeneral
+// stwx LdStGeneral
+// subf IntGeneral
+// subfc IntGeneral
+// subfe IntGeneral
+// subfic IntGeneral
+// subfme IntGeneral
+// subfze IntGeneral
+// sync LdStSync
+// td IntTrapD
+// tdi IntTrapD
+// tlbia LdStSLBIA
+// tlbie LdStDCBF
+// tlbsync SprTLBSYNC
+// tw IntTrapW
+// twi IntTrapW
+// vaddcuw VecGeneral
+// vaddfp VecFP
+// vaddsbs VecGeneral
+// vaddshs VecGeneral
+// vaddsws VecGeneral
+// vaddubm VecGeneral
+// vaddubs VecGeneral
+// vadduhm VecGeneral
+// vadduhs VecGeneral
+// vadduwm VecGeneral
+// vadduws VecGeneral
+// vand VecGeneral
+// vandc VecGeneral
+// vavgsb VecGeneral
+// vavgsh VecGeneral
+// vavgsw VecGeneral
+// vavgub VecGeneral
+// vavguh VecGeneral
+// vavguw VecGeneral
+// vcfsx VecFP
+// vcfux VecFP
+// vcmpbfp VecFPCompare
+// vcmpeqfp VecFPCompare
+// vcmpequb VecGeneral
+// vcmpequh VecGeneral
+// vcmpequw VecGeneral
+// vcmpgefp VecFPCompare
+// vcmpgtfp VecFPCompare
+// vcmpgtsb VecGeneral
+// vcmpgtsh VecGeneral
+// vcmpgtsw VecGeneral
+// vcmpgtub VecGeneral
+// vcmpgtuh VecGeneral
+// vcmpgtuw VecGeneral
+// vctsxs VecFP
+// vctuxs VecFP
+// vexptefp VecFP
+// vlogefp VecFP
+// vmaddfp VecFP
+// vmaxfp VecFPCompare
+// vmaxsb VecGeneral
+// vmaxsh VecGeneral
+// vmaxsw VecGeneral
+// vmaxub VecGeneral
+// vmaxuh VecGeneral
+// vmaxuw VecGeneral
+// vmhaddshs VecComplex
+// vmhraddshs VecComplex
+// vminfp VecFPCompare
+// vminsb VecGeneral
+// vminsh VecGeneral
+// vminsw VecGeneral
+// vminub VecGeneral
+// vminuh VecGeneral
+// vminuw VecGeneral
+// vmladduhm VecComplex
+// vmrghb VecPerm
+// vmrghh VecPerm
+// vmrghw VecPerm
+// vmrglb VecPerm
+// vmrglh VecPerm
+// vmrglw VecPerm
+// vmsubfp VecFP
+// vmsummbm VecComplex
+// vmsumshm VecComplex
+// vmsumshs VecComplex
+// vmsumubm VecComplex
+// vmsumuhm VecComplex
+// vmsumuhs VecComplex
+// vmulesb VecComplex
+// vmulesh VecComplex
+// vmuleub VecComplex
+// vmuleuh VecComplex
+// vmulosb VecComplex
+// vmulosh VecComplex
+// vmuloub VecComplex
+// vmulouh VecComplex
+// vnor VecGeneral
+// vor VecGeneral
+// vperm VecPerm
+// vpkpx VecPerm
+// vpkshss VecPerm
+// vpkshus VecPerm
+// vpkswss VecPerm
+// vpkswus VecPerm
+// vpkuhum VecPerm
+// vpkuhus VecPerm
+// vpkuwum VecPerm
+// vpkuwus VecPerm
+// vrefp VecFPRound
+// vrfim VecFPRound
+// vrfin VecFPRound
+// vrfip VecFPRound
+// vrfiz VecFPRound
+// vrlb VecGeneral
+// vrlh VecGeneral
+// vrlw VecGeneral
+// vrsqrtefp VecFP
+// vsel VecGeneral
+// vsl VecVSL
+// vslb VecGeneral
+// vsldoi VecPerm
+// vslh VecGeneral
+// vslo VecPerm
+// vslw VecGeneral
+// vspltb VecPerm
+// vsplth VecPerm
+// vspltisb VecPerm
+// vspltish VecPerm
+// vspltisw VecPerm
+// vspltw VecPerm
+// vsr VecVSR
+// vsrab VecGeneral
+// vsrah VecGeneral
+// vsraw VecGeneral
+// vsrb VecGeneral
+// vsrh VecGeneral
+// vsro VecPerm
+// vsrw VecGeneral
+// vsubcuw VecGeneral
+// vsubfp VecFP
+// vsubsbs VecGeneral
+// vsubshs VecGeneral
+// vsubsws VecGeneral
+// vsububm VecGeneral
+// vsububs VecGeneral
+// vsubuhm VecGeneral
+// vsubuhs VecGeneral
+// vsubuwm VecGeneral
+// vsubuws VecGeneral
+// vsum2sws VecComplex
+// vsum4sbs VecComplex
+// vsum4shs VecComplex
+// vsum4ubs VecComplex
+// vsumsws VecComplex
+// vupkhpx VecPerm
+// vupkhsb VecPerm
+// vupkhsh VecPerm
+// vupklpx VecPerm
+// vupklsb VecPerm
+// vupklsh VecPerm
+// vxor VecGeneral
+// xor IntGeneral
+// xori IntGeneral
+// xoris IntGeneral
+//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 000000000000..f72194d6de0e
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,63 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<2, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 000000000000..92ed20f17ce5
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,73 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<3, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<6, [IU1]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU1]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<8, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMFSR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<8, [SRU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [SRU]>]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [SRU]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>,
+ InstrItinData<SprSC , [InstrStage<2, [SRU]>]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<1, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<10, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecComplex , [InstrStage<3, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<1, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 000000000000..7474ba494d10
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,76 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4PlusItineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>,
+ InstrItinData<IntMFFS , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<2, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<IntMulHW , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<4, [IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<IntTrapW , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>,
+ InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprMFSR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<4, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<5, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMTSRIN , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+ InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>,
+ InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>,
+ InstrItinData<FPFused , [InstrStage<5, [FPU1]>]>,
+ InstrItinData<FPRes , [InstrStage<14, [FPU1]>]>,
+ InstrItinData<VecGeneral , [InstrStage<1, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<4, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<4, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<4, [VIU1]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VPU]>]>,
+ InstrItinData<VecVSR , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 000000000000..d28214715a76
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,83 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<[
+ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>,
+ InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>,
+ InstrItinData<IntDivW , [InstrStage<36, [IU1]>]>,
+ InstrItinData<IntMFFS , [InstrStage<6, [IU2]>]>,
+ InstrItinData<IntMFVSCR , [InstrStage<1, [VFPU]>]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<IntMulHD , [InstrStage<7, [IU1, IU2]>]>,
+ InstrItinData<IntMulHW , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulHWU , [InstrStage<5, [IU1, IU2]>]>,
+ InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>,
+ InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>,
+ InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IU1, IU2]>]>,
+ InstrItinData<BrB , [InstrStage<1, [BPU]>]>,
+ InstrItinData<BrCR , [InstrStage<4, [BPU]>]>,
+ InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>,
+ InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>,
+ InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>,
+ InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>,
+ InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>,
+ InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>,
+ InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStLWARX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>,
+ InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>,
+ InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>,
+ InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>,
+ InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>,
+ InstrItinData<SprISYNC , [InstrStage<40, [SLU]>]>, // needs work
+ InstrItinData<SprMFSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTMSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMTSR , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<3, [SLU]>]>,
+ InstrItinData<SprMFCR , [InstrStage<2, [IU2]>]>,
+ InstrItinData<SprMFMSR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFSPR , [InstrStage<3, [IU2]>]>,
+ InstrItinData<SprMFTB , [InstrStage<10, [IU2]>]>,
+ InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>,
+ InstrItinData<SprSC , [InstrStage<1, [IU2]>]>,
+ InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>,
+ InstrItinData<FPFused , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPRes , [InstrStage<6, [FPU1, FPU2]>]>,
+ InstrItinData<FPSqrt , [InstrStage<40, [FPU1, FPU2]>]>,
+ InstrItinData<VecGeneral , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecFP , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+ InstrItinData<VecComplex , [InstrStage<5, [VIU2]>]>,
+ InstrItinData<VecPerm , [InstrStage<3, [VPU]>]>,
+ InstrItinData<VecFPRound , [InstrStage<8, [VFPU]>]>,
+ InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>,
+ InstrItinData<VecVSR , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 000000000000..425d8e6195c6
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,152 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "PPCGenSubtarget.inc"
+#include <cstdlib>
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+ host_basic_info_data_t hostInfo;
+ mach_msg_type_number_t infoCount;
+
+ infoCount = HOST_BASIC_INFO_COUNT;
+ host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo,
+ &infoCount);
+
+ if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+ switch(hostInfo.cpu_subtype) {
+ case CPU_SUBTYPE_POWERPC_601: return "601";
+ case CPU_SUBTYPE_POWERPC_602: return "602";
+ case CPU_SUBTYPE_POWERPC_603: return "603";
+ case CPU_SUBTYPE_POWERPC_603e: return "603e";
+ case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+ case CPU_SUBTYPE_POWERPC_604: return "604";
+ case CPU_SUBTYPE_POWERPC_604e: return "604e";
+ case CPU_SUBTYPE_POWERPC_620: return "620";
+ case CPU_SUBTYPE_POWERPC_750: return "750";
+ case CPU_SUBTYPE_POWERPC_7400: return "7400";
+ case CPU_SUBTYPE_POWERPC_7450: return "7450";
+ case CPU_SUBTYPE_POWERPC_970: return "970";
+ default: ;
+ }
+
+ return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M,
+ const std::string &FS, bool is64Bit)
+ : TM(tm)
+ , StackAlignment(16)
+ , DarwinDirective(PPC::DIR_NONE)
+ , IsGigaProcessor(false)
+ , Has64BitSupport(false)
+ , Use64BitRegs(false)
+ , IsPPC64(is64Bit)
+ , HasAltivec(false)
+ , HasFSQRT(false)
+ , HasSTFIWX(false)
+ , HasLazyResolverStubs(false)
+ , DarwinVers(0) {
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+#if defined(__APPLE__)
+ CPU = GetCurrentPowerPCCPU();
+#endif
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (is64Bit) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ }
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string &TT = M.getTargetTriple();
+ if (TT.length() > 7) {
+ // Determine which version of darwin this is.
+ size_t DarwinPos = TT.find("-darwin");
+ if (DarwinPos != std::string::npos) {
+ if (isdigit(TT[DarwinPos+7]))
+ DarwinVers = atoi(&TT[DarwinPos+7]);
+ else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+ }
+ } else if (TT.empty()) {
+ // Try to autosense the subtarget from the host compiler.
+#if defined(__APPLE__)
+#if __APPLE_CC__ > 5400
+ DarwinVers = 9; // GCC 5400+ is Leopard.
+#else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+#endif
+#endif
+ }
+
+ // Set up darwin-specific properties.
+ if (isDarwin()) {
+ HasLazyResolverStubs = true;
+ AsmFlavor = NewMnemonic;
+ } else {
+ AsmFlavor = OldMnemonic;
+ }
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
+ // We never hae stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 000000000000..176f3e19477a
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,160 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_64
+ };
+}
+
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ OldMnemonic, NewMnemonic, Unset
+ };
+protected:
+ const TargetMachine &TM;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// AsmFlavor - Which PPC asm dialect to use.
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool IsGigaProcessor;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasFSQRT;
+ bool HasSTFIWX;
+ bool HasLazyResolverStubs;
+
+ /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
+ /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
+ unsigned char DarwinVers; // Is any darwin-ppc platform.
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ PPCSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS, bool is64Bit);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128"
+ : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128";
+ }
+
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV) const;
+
+ // Specific obvious features.
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool isGigaProcessor() const { return IsGigaProcessor; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return DarwinVers != 0; }
+ /// isDarwin - True if this is darwin9 (leopard, 10.5) or above.
+ bool isDarwin9() const { return DarwinVers >= 9; }
+
+ /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+ unsigned getDarwinVers() const { return DarwinVers; }
+
+ bool isMachoABI() const { return isDarwin() || IsPPC64; }
+ bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
new file mode 100644
index 000000000000..c69e591a6632
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -0,0 +1,161 @@
+//===-- PPCTargetAsmInfo.cpp - PPC asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the DarwinTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+ PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ PCSymbol = ".";
+ CommentString = ";";
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LessPrivateGlobalPrefix = "l";
+ StringConstantPrefix = "\1LC";
+ ConstantPoolSection = "\t.const\t";
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ HasSingleParameterDotFile = false;
+ SwitchToSectionDirective = "\t.section ";
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ HiddenDirective = "\t.private_extern ";
+ SupportsExceptionHandling = true;
+ NeedsIndirectEncoding = true;
+ NeedsSet = true;
+ BSSSection = 0;
+
+ DwarfEHFrameSection =
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
+ GlobalEHDirective = "\t.globl\t";
+ SupportsWeakOmittedEHFrame = false;
+
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+PPCDarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ if (Reason == DwarfEncoding::Functions && Global)
+ return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4);
+ else if (Reason == DwarfEncoding::CodeLabels || !Global)
+ return DW_EH_PE_pcrel;
+ else
+ return DW_EH_PE_absptr;
+}
+
+const char *
+PPCDarwinTargetAsmInfo::getEHGlobalPrefix() const
+{
+ const PPCSubtarget* Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ if (Subtarget->getDarwinVers() > 9)
+ return PrivateGlobalPrefix;
+ else
+ return "";
+}
+
+PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
+ PPCTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ ConstantPoolSection = "\t.section .rodata.cst4\t";
+ JumpTableDataSection = ".section .rodata.cst4";
+ CStringSection = ".rodata.str";
+ StaticCtorsSection = ".section\t.ctors,\"aw\",@progbits";
+ StaticDtorsSection = ".section\t.dtors,\"aw\",@progbits";
+ UsedDirective = "\t# .no_dead_strip\t";
+ WeakRefDirective = "\t.weak\t";
+ BSSSection = "\t.section\t\".sbss\",\"aw\",@nobits";
+
+ // PPC/Linux normally uses named section for BSS.
+ BSSSection_ = getNamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ /* Override */ true);
+
+ // Debug Information
+ AbsoluteDebugSectionOffsets = true;
+ SupportsDebugInformation = true;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Exceptions handling
+ if (!TM.getSubtargetImpl()->isPPC64())
+ SupportsExceptionHandling = true;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits";
+ DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits";
+}
+
+/// PreferredEHDataFormat - This hook allows the target to select data
+/// format used for encoding pointers in exception handling data. Reason is
+/// 0 for data, 1 for code labels, 2 for function pointers. Global is true
+/// if the symbol can be relocated.
+unsigned
+PPCLinuxTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ // We really need to write something here.
+ return TargetAsmInfo::PreferredEHDataFormat(Reason, Global);
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.h b/lib/Target/PowerPC/PPCTargetAsmInfo.h
new file mode 100644
index 000000000000..edf40c9346cf
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.h
@@ -0,0 +1,62 @@
+//=====-- PPCTargetAsmInfo.h - PPC asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the DarwinTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "PPCTargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ template <class BaseTAI>
+ struct PPCTargetAsmInfo : public BaseTAI {
+ explicit PPCTargetAsmInfo(const PPCTargetMachine &TM):
+ BaseTAI(TM) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ bool isPPC64 = Subtarget->isPPC64();
+
+ BaseTAI::ZeroDirective = "\t.space\t";
+ BaseTAI::SetDirective = "\t.set";
+ BaseTAI::Data64bitsDirective = isPPC64 ? "\t.quad\t" : 0;
+ BaseTAI::AlignmentIsInBytes = false;
+ BaseTAI::LCOMMDirective = "\t.lcomm\t";
+ BaseTAI::InlineAsmStart = "# InlineAsm Start";
+ BaseTAI::InlineAsmEnd = "# InlineAsm End";
+ BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor();
+ }
+ };
+
+ typedef PPCTargetAsmInfo<TargetAsmInfo> PPCGenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class PPCTargetAsmInfo<TargetAsmInfo>);
+
+ struct PPCDarwinTargetAsmInfo : public PPCTargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual const char *getEHGlobalPrefix() const;
+ };
+
+ struct PPCLinuxTargetAsmInfo : public PPCTargetAsmInfo<ELFTargetAsmInfo> {
+ explicit PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 000000000000..ef3f0fc04219
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,250 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// PowerPCTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int PowerPCTargetMachineModule;
+int PowerPCTargetMachineModule = 0;
+
+// Register the targets
+static RegisterTarget<PPC32TargetMachine>
+X("ppc32", "PowerPC 32");
+static RegisterTarget<PPC64TargetMachine>
+Y("ppc64", "PowerPC 64");
+
+// No assembler printer by default
+PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
+
+const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const {
+ if (Subtarget.isDarwin())
+ return new PPCDarwinTargetAsmInfo(*this);
+ else
+ return new PPCLinuxTargetAsmInfo(*this);
+}
+
+unsigned PPC32TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 4)
+ return 10;
+#endif
+ return 0;
+}
+unsigned PPC64TargetMachine::getJITMatchQuality() {
+#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__)
+ if (sizeof(void*) == 8)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-")
+ return 20;
+
+ // If the target triple is something non-powerpc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "powerpc64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-")
+ return 20;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(*this, M, FS, is64Bit),
+ DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+ FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) {
+
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isDarwin())
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+}
+
+/// Override this for PowerPC. Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS)
+ : PPCTargetMachine(M, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createPPCISelDag(*this));
+ return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Must run branch selection immediately preceding the asm printer.
+ PM.add(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE) {
+ // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+ // FIXME: This should be moved to TargetJITInfo!!
+ if (Subtarget.isPPC64()) {
+ // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+ // instructions to materialize arbitrary global variable + function +
+ // constant pool addresses.
+ setRelocationModel(Reloc::PIC_);
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ DisableJumpTables = true;
+ } else {
+ setRelocationModel(Reloc::Static);
+ }
+
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCCodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 000000000000..086d2f4cf81d
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,120 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameInfo.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCMachOWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ PPCInstrInfo InstrInfo;
+ PPCFrameInfo FrameInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ PPCMachOWriterInfo MachOWriterInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ PPCTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
+ virtual PPCTargetLowering *getTargetLowering() const {
+ return const_cast<PPCTargetLowering*>(&TLInfo);
+ }
+ virtual const PPCRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const PPCMachOWriterInfo *getMachOWriterInfo() const {
+ return &MachOWriterInfo;
+ }
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+ PPC32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+ PPC64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
new file mode 100644
index 000000000000..688fb3090803
--- /dev/null
+++ b/lib/Target/PowerPC/README.txt
@@ -0,0 +1,799 @@
+//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
+
+TODO:
+* gpr0 allocation
+* implement do-loop -> bdnz transform
+* lmw/stmw pass a la arm load store optimizer for prolog/epilog
+
+===-------------------------------------------------------------------------===
+
+Support 'update' load/store instructions. These are cracked on the G5, but are
+still a codesize win.
+
+With preinc enabled, this:
+
+long *%test4(long *%X, long *%dest) {
+ %Y = getelementptr long* %X, int 4
+ %A = load long* %Y
+ store long %A, long* %dest
+ ret long* %Y
+}
+
+compiles to:
+
+_test4:
+ mr r2, r3
+ lwzu r5, 32(r2)
+ lwz r3, 36(r3)
+ stw r5, 0(r4)
+ stw r3, 4(r4)
+ mr r3, r2
+ blr
+
+with -sched=list-burr, I get:
+
+_test4:
+ lwz r2, 36(r3)
+ lwzu r5, 32(r3)
+ stw r2, 4(r4)
+ stw r5, 0(r4)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile the hottest inner loop of viterbi to:
+
+ li r6, 0
+ b LBB1_84 ;bb432.i
+LBB1_83: ;bb420.i
+ lbzx r8, r5, r7
+ addi r6, r7, 1
+ stbx r8, r4, r7
+LBB1_84: ;bb432.i
+ mr r7, r6
+ cmplwi cr0, r7, 143
+ bne cr0, LBB1_83 ;bb420.i
+
+The CBE manages to produce:
+
+ li r0, 143
+ mtctr r0
+loop:
+ lbzx r2, r2, r11
+ stbx r0, r2, r9
+ addi r2, r2, 1
+ bdz later
+ b loop
+
+This could be much better (bdnz instead of bdz) but it still beats us. If we
+produced this with bdnz, the loop would be a single dispatch group.
+
+===-------------------------------------------------------------------------===
+
+Compile:
+
+void foo(int *P) {
+ if (P) *P = 0;
+}
+
+into:
+
+_foo:
+ cmpwi cr0,r3,0
+ beqlr cr0
+ li r0,0
+ stw r0,0(r3)
+ blr
+
+This is effectively a simple form of predication.
+
+===-------------------------------------------------------------------------===
+
+Lump the constant pool for each function into ONE pic object, and reference
+pieces of it as offsets from the start. For functions like this (contrived
+to have lots of constants obviously):
+
+double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
+
+We generate:
+
+_X:
+ lis r2, ha16(.CPI_X_0)
+ lfd f0, lo16(.CPI_X_0)(r2)
+ lis r2, ha16(.CPI_X_1)
+ lfd f2, lo16(.CPI_X_1)(r2)
+ fmadd f0, f1, f0, f2
+ lis r2, ha16(.CPI_X_2)
+ lfd f1, lo16(.CPI_X_2)(r2)
+ lis r2, ha16(.CPI_X_3)
+ lfd f2, lo16(.CPI_X_3)(r2)
+ fmadd f1, f0, f1, f2
+ blr
+
+It would be better to materialize .CPI_X into a register, then use immediates
+off of the register to avoid the lis's. This is even more important in PIC
+mode.
+
+Note that this (and the static variable version) is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+Here's another example (the sgn function):
+double testf(double a) {
+ return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+it produces a BB like this:
+LBB1_1: ; cond_true
+ lis r2, ha16(LCPI1_0)
+ lfs f0, lo16(LCPI1_0)(r2)
+ lis r2, ha16(LCPI1_1)
+ lis r3, ha16(LCPI1_2)
+ lfs f2, lo16(LCPI1_2)(r3)
+ lfs f3, lo16(LCPI1_1)(r2)
+ fsub f0, f0, f1
+ fsel f1, f0, f2, f3
+ blr
+
+===-------------------------------------------------------------------------===
+
+PIC Code Gen IPO optimization:
+
+Squish small scalar globals together into a single global struct, allowing the
+address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
+of the GOT on targets with one).
+
+Note that this is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+Implement Newton-Rhapson method for improving estimate instructions to the
+correct accuracy, and implementing divide as multiply by reciprocal when it has
+more than one use. Itanium will want this too.
+
+===-------------------------------------------------------------------------===
+
+Compile offsets from allocas:
+
+int *%test() {
+ %X = alloca { int, int }
+ %Y = getelementptr {int,int}* %X, int 0, uint 1
+ ret int* %Y
+}
+
+into a single add, not two:
+
+_test:
+ addi r2, r1, -8
+ addi r3, r2, 4
+ blr
+
+--> important for C++.
+
+===-------------------------------------------------------------------------===
+
+No loads or stores of the constants should be needed:
+
+struct foo { double X, Y; };
+void xxx(struct foo F);
+void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
+
+===-------------------------------------------------------------------------===
+
+Darwin Stub LICM optimization:
+
+Loops like this:
+
+ for (...) bar();
+
+Have to go through an indirect stub if bar is external or linkonce. It would
+be better to compile it as:
+
+ fp = &bar;
+ for (...) fp();
+
+which only computes the address of bar once (instead of each time through the
+stub). This is Darwin specific and would have to be done in the code generator.
+Probably not a win on x86.
+
+===-------------------------------------------------------------------------===
+
+Simple IPO for argument passing, change:
+ void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)
+
+the Darwin ABI specifies that any integer arguments in the first 32 bytes worth
+of arguments get assigned to r3 through r10. That is, if you have a function
+foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the
+argument bytes for r4 and r5. The trick then would be to shuffle the argument
+order for functions we can internalize so that the maximum number of
+integers/pointers get passed in regs before you see any of the fp arguments.
+
+Instead of implementing this, it would actually probably be easier to just
+implement a PPC fastcc, where we could do whatever we wanted to the CC,
+including having this work sanely.
+
+===-------------------------------------------------------------------------===
+
+Fix Darwin FP-In-Integer Registers ABI
+
+Darwin passes doubles in structures in integer registers, which is very very
+bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation
+that percolates these things out of functions.
+
+Check out how horrible this is:
+http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
+
+This is an extension of "interprocedural CC unmunging" that can't be done with
+just fastcc.
+
+===-------------------------------------------------------------------------===
+
+Compile this:
+
+int foo(int a) {
+ int b = (a < 8);
+ if (b) {
+ return b * 3; // ignore the fact that this is always 3.
+ } else {
+ return 2;
+ }
+}
+
+into something not this:
+
+_foo:
+1) cmpwi cr7, r3, 8
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+1) cmpwi cr0, r3, 7
+ bgt cr0, LBB1_2 ; UnifiedReturnBlock
+LBB1_1: ; then
+ rlwinm r2, r2, 0, 31, 31
+ mulli r3, r2, 3
+ blr
+LBB1_2: ; UnifiedReturnBlock
+ li r3, 2
+ blr
+
+In particular, the two compares (marked 1) could be shared by reversing one.
+This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the
+same operands (but backwards) exists. In this case, this wouldn't save us
+anything though, because the compares still wouldn't be shared.
+
+===-------------------------------------------------------------------------===
+
+We should custom expand setcc instead of pretending that we have it. That
+would allow us to expose the access of the crbit after the mfcr, allowing
+that access to be trivially folded into other ops. A simple example:
+
+int foo(int a, int b) { return (a < b) << 4; }
+
+compiles into:
+
+_foo:
+ cmpw cr7, r3, r4
+ mfcr r2, 1
+ rlwinm r2, r2, 29, 31, 31
+ slwi r3, r2, 4
+ blr
+
+===-------------------------------------------------------------------------===
+
+Fold add and sub with constant into non-extern, non-weak addresses so this:
+
+static int a;
+void bar(int b) { a = b; }
+void foo(unsigned char *c) {
+ *c = a;
+}
+
+So that
+
+_foo:
+ lis r2, ha16(_a)
+ la r2, lo16(_a)(r2)
+ lbz r2, 3(r2)
+ stb r2, 0(r3)
+ blr
+
+Becomes
+
+_foo:
+ lis r2, ha16(_a+3)
+ lbz r2, lo16(_a+3)(r2)
+ stb r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We generate really bad code for this:
+
+int f(signed char *a, _Bool b, _Bool c) {
+ signed char t = 0;
+ if (b) t = *a;
+ if (c) *a = t;
+}
+
+===-------------------------------------------------------------------------===
+
+This:
+int test(unsigned *P) { return *P >> 24; }
+
+Should compile to:
+
+_test:
+ lbz r3,0(r3)
+ blr
+
+not:
+
+_test:
+ lwz r2, 0(r3)
+ srwi r3, r2, 24
+ blr
+
+===-------------------------------------------------------------------------===
+
+On the G5, logical CR operations are more expensive in their three
+address form: ops that read/write the same register are half as expensive as
+those that read from two registers that are different from their destination.
+
+We should model this with two separate instructions. The isel should generate
+the "two address" form of the instructions. When the register allocator
+detects that it needs to insert a copy due to the two-addresness of the CR
+logical op, it will invoke PPCInstrInfo::convertToThreeAddress. At this point
+we can convert to the "three address" instruction, to save code space.
+
+This only matters when we start generating cr logical ops.
+
+===-------------------------------------------------------------------------===
+
+We should compile these two functions to the same thing:
+
+#include <stdlib.h>
+void f(int a, int b, int *P) {
+ *P = (a-b)>=0?(a-b):(b-a);
+}
+void g(int a, int b, int *P) {
+ *P = abs(a-b);
+}
+
+Further, they should compile to something better than:
+
+_g:
+ subf r2, r4, r3
+ subfic r3, r2, 0
+ cmpwi cr0, r2, -1
+ bgt cr0, LBB2_2 ; entry
+LBB2_1: ; entry
+ mr r2, r3
+LBB2_2: ; entry
+ stw r2, 0(r5)
+ blr
+
+GCC produces:
+
+_g:
+ subf r4,r4,r3
+ srawi r2,r4,31
+ xor r0,r2,r4
+ subf r0,r2,r0
+ stw r0,0(r5)
+ blr
+
+... which is much nicer.
+
+This theoretically may help improve twolf slightly (used in dimbox.c:142?).
+
+===-------------------------------------------------------------------------===
+
+int foo(int N, int ***W, int **TK, int X) {
+ int t, i;
+
+ for (t = 0; t < N; ++t)
+ for (i = 0; i < 4; ++i)
+ W[t / X][i][t % X] = TK[i][t];
+
+ return 5;
+}
+
+We generate relatively atrocious code for this loop compared to gcc.
+
+We could also strength reduce the rem and the div:
+http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
+
+===-------------------------------------------------------------------------===
+
+float foo(float X) { return (int)(X); }
+
+Currently produces:
+
+_foo:
+ fctiwz f0, f1
+ stfd f0, -8(r1)
+ lwz r2, -4(r1)
+ extsw r2, r2
+ std r2, -16(r1)
+ lfd f0, -16(r1)
+ fcfid f0, f0
+ frsp f1, f0
+ blr
+
+We could use a target dag combine to turn the lwz/extsw into an lwa when the
+lwz has a single use. Since LWA is cracked anyway, this would be a codesize
+win only.
+
+===-------------------------------------------------------------------------===
+
+We generate ugly code for this:
+
+void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
+ unsigned code = 0;
+ if(dx < -dw) code |= 1;
+ if(dx > dw) code |= 2;
+ if(dy < -dw) code |= 4;
+ if(dy > dw) code |= 8;
+ if(dz < -dw) code |= 16;
+ if(dz > dw) code |= 32;
+ *ret = code;
+}
+
+===-------------------------------------------------------------------------===
+
+Complete the signed i32 to FP conversion code using 64-bit registers
+transformation, good for PI. See PPCISelLowering.cpp, this comment:
+
+ // FIXME: disable this lowered code. This generates 64-bit register values,
+ // and we don't model the fact that the top part is clobbered by calls. We
+ // need to flag these together so that the value isn't live across a call.
+ //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+Also, if the registers are spilled to the stack, we have to ensure that all
+64-bits of them are save/restored, otherwise we will miscompile the code. It
+sounds like we need to get the 64-bit register classes going.
+
+===-------------------------------------------------------------------------===
+
+%struct.B = type { i8, [3 x i8] }
+
+define void @bar(%struct.B* %b) {
+entry:
+ %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp = load i32* %tmp ; <uint> [#uses=1]
+ %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
+ %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
+ %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
+ %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
+ %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
+ %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
+ %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
+ %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
+ %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
+ %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
+ store i32 %tmp13, i32* %tmp8
+ ret void
+}
+
+We emit:
+
+_foo:
+ lwz r2, 0(r3)
+ slwi r4, r2, 1
+ or r4, r4, r2
+ rlwimi r2, r4, 0, 0, 0
+ stw r2, 0(r3)
+ blr
+
+We could collapse a bunch of those ORs and ANDs and generate the following
+equivalent code:
+
+_foo:
+ lwz r2, 0(r3)
+ rlwinm r4, r2, 1, 0, 0
+ or r2, r2, r4
+ stw r2, 0(r3)
+ blr
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+unsigned test6(unsigned x) {
+ return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16);
+}
+
+into:
+
+_test6:
+ lis r2, 255
+ rlwinm r3, r3, 16, 0, 31
+ ori r2, r2, 255
+ and r3, r3, r2
+ blr
+
+GCC gets it down to:
+
+_test6:
+ rlwinm r0,r3,16,8,15
+ rlwinm r3,r3,16,24,31
+ or r3,r3,r0
+ blr
+
+
+===-------------------------------------------------------------------------===
+
+Consider a function like this:
+
+float foo(float X) { return X + 1234.4123f; }
+
+The FP constant ends up in the constant pool, so we need to get the LR register.
+ This ends up producing code like this:
+
+_foo:
+.LBB_foo_0: ; entry
+ mflr r11
+*** stw r11, 8(r1)
+ bl "L00000$pb"
+"L00000$pb":
+ mflr r2
+ addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
+ lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
+ fadds f1, f1, f0
+*** lwz r11, 8(r1)
+ mtlr r11
+ blr
+
+This is functional, but there is no reason to spill the LR register all the way
+to the stack (the two marked instrs): spilling it to a GPR is quite enough.
+
+Implementing this will require some codegen improvements. Nate writes:
+
+"So basically what we need to support the "no stack frame save and restore" is a
+generalization of the LR optimization to "callee-save regs".
+
+Currently, we have LR marked as a callee-save reg. The register allocator sees
+that it's callee save, and spills it directly to the stack.
+
+Ideally, something like this would happen:
+
+LR would be in a separate register class from the GPRs. The class of LR would be
+marked "unspillable". When the register allocator came across an unspillable
+reg, it would ask "what is the best class to copy this into that I *can* spill"
+If it gets a class back, which it will in this case (the gprs), it grabs a free
+register of that class. If it is then later necessary to spill that reg, so be
+it.
+
+===-------------------------------------------------------------------------===
+
+We compile this:
+int test(_Bool X) {
+ return X ? 524288 : 0;
+}
+
+to:
+_test:
+ cmplwi cr0, r3, 0
+ lis r2, 8
+ li r3, 0
+ beq cr0, LBB1_2 ;entry
+LBB1_1: ;entry
+ mr r3, r2
+LBB1_2: ;entry
+ blr
+
+instead of:
+_test:
+ addic r2,r3,-1
+ subfe r0,r2,r3
+ slwi r3,r0,19
+ blr
+
+This sort of thing occurs a lot due to globalopt.
+
+===-------------------------------------------------------------------------===
+
+We currently compile 32-bit bswap:
+
+declare i32 @llvm.bswap.i32(i32 %A)
+define i32 @test(i32 %A) {
+ %B = call i32 @llvm.bswap.i32(i32 %A)
+ ret i32 %B
+}
+
+to:
+
+_test:
+ rlwinm r2, r3, 24, 16, 23
+ slwi r4, r3, 24
+ rlwimi r2, r3, 8, 24, 31
+ rlwimi r4, r3, 8, 8, 15
+ rlwimi r4, r2, 0, 16, 31
+ mr r3, r4
+ blr
+
+it would be more efficient to produce:
+
+_foo: mr r0,r3
+ rlwinm r3,r3,8,0xffffffff
+ rlwimi r3,r0,24,0,7
+ rlwimi r3,r0,24,16,23
+ blr
+
+===-------------------------------------------------------------------------===
+
+test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to:
+
+__ZNK4llvm5APInt17countLeadingZerosEv:
+ ld r2, 0(r3)
+ cntlzd r2, r2
+ or r2, r2, r2 <<-- silly.
+ addi r3, r2, -64
+ blr
+
+The dead or is a 'truncate' from 64- to 32-bits.
+
+===-------------------------------------------------------------------------===
+
+We generate horrible ppc code for this:
+
+#define N 2000000
+double a[N],c[N];
+void simpleloop() {
+ int j;
+ for (j=0; j<N; j++)
+ c[j] = a[j];
+}
+
+LBB1_1: ;bb
+ lfdx f0, r3, r4
+ addi r5, r5, 1 ;; Extra IV for the exit value compare.
+ stfdx f0, r2, r4
+ addi r4, r4, 8
+
+ xoris r6, r5, 30 ;; This is due to a large immediate.
+ cmplwi cr0, r6, 33920
+ bne cr0, LBB1_1
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+__Z11no_overflowjj:
+ add r4,r3,r4
+ subfc r3,r3,r4
+ li r3,0
+ adde r3,r3,r3
+ blr
+
+(or better) not:
+
+__Z11no_overflowjj:
+ add r2, r4, r3
+ cmplw cr7, r2, r3
+ mfcr r2
+ rlwinm r2, r2, 29, 31, 31
+ xori r3, r2, 1
+ blr
+
+//===---------------------------------------------------------------------===//
+
+We compile some FP comparisons into an mfcr with two rlwinms and an or. For
+example:
+#include <math.h>
+int test(double x, double y) { return islessequal(x, y);}
+int test2(double x, double y) { return islessgreater(x, y);}
+int test3(double x, double y) { return !islessequal(x, y);}
+
+Compiles into (all three are similar, but the bits differ):
+
+_test:
+ fcmpu cr7, f1, f2
+ mfcr r2
+ rlwinm r3, r2, 29, 31, 31
+ rlwinm r2, r2, 31, 31, 31
+ or r3, r2, r3
+ blr
+
+GCC compiles this into:
+
+ _test:
+ fcmpu cr7,f1,f2
+ cror 30,28,30
+ mfcr r3
+ rlwinm r3,r3,31,1
+ blr
+
+which is more efficient and can use mfocr. See PR642 for some more context.
+
+//===---------------------------------------------------------------------===//
+
+void foo(float *data, float d) {
+ long i;
+ for (i = 0; i < 8000; i++)
+ data[i] = d;
+}
+void foo2(float *data, float d) {
+ long i;
+ data--;
+ for (i = 0; i < 8000; i++) {
+ data[1] = d;
+ data++;
+ }
+}
+
+These compile to:
+
+_foo:
+ li r2, 0
+LBB1_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB1_1 ; bb
+ blr
+_foo2:
+ li r2, 0
+LBB2_1: ; bb
+ addi r4, r2, 4
+ stfsx f1, r3, r2
+ cmplwi cr0, r4, 32000
+ mr r2, r4
+ bne cr0, LBB2_1 ; bb
+ blr
+
+The 'mr' could be eliminated to folding the add into the cmp better.
+
+//===---------------------------------------------------------------------===//
+Codegen for the following (low-probability) case deteriorated considerably
+when the correctness fixes for unordered comparisons went in (PR 642, 58871).
+It should be possible to recover the code quality described in the comments.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3
+; This should produce one 'or' or 'cror' instruction per function.
+
+; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3
+; PR2964
+
+define i32 @test(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test2(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]
+ %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp345
+}
+
+define i32 @test3(double %x, double %y) nounwind {
+entry:
+ %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]
+ %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp34
+}
+//===----------------------------------------------------------------------===//
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+
+; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and
+; should not be generated except with -enable-finite-only-fp-math or the like).
+; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
+; recognize a more elaborate tree than a simple SETxx.
+
+define double @test_FNEG_sel(double %A, double %B, double %C) {
+ %D = sub double -0.000000e+00, %A ; <double> [#uses=1]
+ %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
+ %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
+ ret double %E
+}
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
new file mode 100644
index 000000000000..1e4c6fb98440
--- /dev/null
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -0,0 +1,211 @@
+//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
+
+Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
+registers, to generate better spill code.
+
+//===----------------------------------------------------------------------===//
+
+The first should be a single lvx from the constant pool, the second should be
+a xor/stvx:
+
+void foo(void) {
+ int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
+ bar (x);
+}
+
+#include <string.h>
+void foo(void) {
+ int x[8] __attribute__((aligned(128)));
+ memset (x, 0, sizeof (x));
+ bar (x);
+}
+
+//===----------------------------------------------------------------------===//
+
+Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
+
+When -ffast-math is on, we can use 0.0.
+
+//===----------------------------------------------------------------------===//
+
+ Consider this:
+ v4f32 Vector;
+ v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
+
+Since we know that "Vector" is 16-byte aligned and we know the element offset
+of ".X", we should change the load into a lve*x instruction, instead of doing
+a load/store/lve*x sequence.
+
+//===----------------------------------------------------------------------===//
+
+For functions that use altivec AND have calls, we are VRSAVE'ing all call
+clobbered regs.
+
+//===----------------------------------------------------------------------===//
+
+Implement passing vectors by value into calls and receiving them as arguments.
+
+//===----------------------------------------------------------------------===//
+
+GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
+of C1/C2/C3, then a load and vperm of Variable.
+
+//===----------------------------------------------------------------------===//
+
+We need a way to teach tblgen that some operands of an intrinsic are required to
+be constants. The verifier should enforce this constraint.
+
+//===----------------------------------------------------------------------===//
+
+We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
+aligned stack slot, followed by a load/vperm. We should probably just store it
+to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
+in memory this is a big win.
+
+//===----------------------------------------------------------------------===//
+
+extract_vector_elt of an arbitrary constant vector can be done with the
+following instructions:
+
+vTemp = vec_splat(v0,2); // 2 is the element the src is in.
+vec_ste(&destloc,0,vTemp);
+
+We can do an arbitrary non-constant value by using lvsr/perm/ste.
+
+//===----------------------------------------------------------------------===//
+
+If we want to tie instruction selection into the scheduler, we can do some
+constant formation with different instructions. For example, we can generate
+"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
+"vsplti 0" or "vxor", each of which use different execution units, thus could
+help scheduling.
+
+This is probably only reasonable for a post-pass scheduler.
+
+//===----------------------------------------------------------------------===//
+
+For this function:
+
+void test(vector float *A, vector float *B) {
+ vector float C = (vector float)vec_cmpeq(*A, *B);
+ if (!vec_any_eq(*A, *B))
+ *B = (vector float){0,0,0,0};
+ *A = C;
+}
+
+we get the following basic block:
+
+ ...
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp v4, v3, v2
+ vcmpeqfp. v2, v3, v2
+ bne cr6, LBB1_2 ; cond_next
+
+The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
+vcmpeqfp. result is used by a branch. This can be improved.
+
+//===----------------------------------------------------------------------===//
+
+The code generated for this is truly aweful:
+
+vector float test(float a, float b) {
+ return (vector float){ 0.0, a, 0.0, 0.0};
+}
+
+LCPI1_0: ; float
+ .space 4
+ .text
+ .globl _test
+ .align 4
+_test:
+ mfspr r2, 256
+ oris r3, r2, 4096
+ mtspr 256, r3
+ lis r3, ha16(LCPI1_0)
+ addi r4, r1, -32
+ stfs f1, -16(r1)
+ addi r5, r1, -16
+ lfs f0, lo16(LCPI1_0)(r3)
+ stfs f0, -32(r1)
+ lvx v2, 0, r4
+ lvx v3, 0, r5
+ vmrghw v3, v3, v2
+ vspltw v2, v2, 0
+ vmrghw v2, v2, v3
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+int foo(vector float *x, vector float *y) {
+ if (vec_all_eq(*x,*y)) return 3245;
+ else return 12;
+}
+
+A predicate compare being used in a select_cc should have the same peephole
+applied to it as a predicate compare used by a br_cc. There should be no
+mfcr here:
+
+_foo:
+ mfspr r2, 256
+ oris r5, r2, 12288
+ mtspr 256, r5
+ li r5, 12
+ li r6, 3245
+ lvx v2, 0, r4
+ lvx v3, 0, r3
+ vcmpeqfp. v2, v3, v2
+ mfcr r3, 2
+ rlwinm r3, r3, 25, 31, 31
+ cmpwi cr0, r3, 0
+ bne cr0, LBB1_2 ; entry
+LBB1_1: ; entry
+ mr r6, r5
+LBB1_2: ; entry
+ mr r3, r6
+ mtspr 256, r2
+ blr
+
+//===----------------------------------------------------------------------===//
+
+CodeGen/PowerPC/vec_constants.ll has an and operation that should be
+codegen'd to andc. The issue is that the 'all ones' build vector is
+SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
+which prevents the vnot pattern from matching.
+
+
+//===----------------------------------------------------------------------===//
+
+An alternative to the store/store/load approach for illegal insert element
+lowering would be:
+
+1. store element to any ol' slot
+2. lvx the slot
+3. lvsl 0; splat index; vcmpeq to generate a select mask
+4. lvsl slot + x; vperm to rotate result into correct slot
+5. vsel result together.
+
+//===----------------------------------------------------------------------===//
+
+Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:
+
+#include <altivec.h>
+ int f(vector float a, vector float b)
+ {
+ int aa = 0;
+ if (vec_all_ge(a, b))
+ aa |= 0x1;
+ if (vec_any_ge(a,b))
+ aa |= 0x2;
+ return aa;
+}
+
+vector float f(vector float a, vector float b) {
+ if (vec_any_eq(a, b))
+ return a;
+ else
+ return b;
+}
+
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
new file mode 100644
index 000000000000..f68cf0e40df0
--- /dev/null
+++ b/lib/Target/README.txt
@@ -0,0 +1,1679 @@
+Target Independent Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+With the recent changes to make the implicit def/use set explicit in
+machineinstrs, we should change the target descriptions for 'call' instructions
+so that the .td files don't list all the call-clobbered registers as implicit
+defs. Instead, these should be added by the code generator (e.g. on the dag).
+
+This has a number of uses:
+
+1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
+ for their different impdef sets.
+2. Targets with multiple calling convs (e.g. x86) which have different clobber
+ sets don't need copies of call instructions.
+3. 'Interprocedural register allocation' can be done to reduce the clobber sets
+ of calls.
+
+//===---------------------------------------------------------------------===//
+
+Make the PPC branch selector target independant
+
+//===---------------------------------------------------------------------===//
+
+Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
+precision don't matter (ffastmath). Misc/mandel will like this. :) This isn't
+safe in general, even on darwin. See the libm implementation of hypot for
+examples (which special case when x/y are exactly zero to get signed zeros etc
+right).
+
+//===---------------------------------------------------------------------===//
+
+Solve this DAG isel folding deficiency:
+
+int X, Y;
+
+void fn1(void)
+{
+ X = X | (Y << 3);
+}
+
+compiles to
+
+fn1:
+ movl Y, %eax
+ shll $3, %eax
+ orl X, %eax
+ movl %eax, X
+ ret
+
+The problem is the store's chain operand is not the load X but rather
+a TokenFactor of the load X and load Y, which prevents the folding.
+
+There are two ways to fix this:
+
+1. The dag combiner can start using alias analysis to realize that y/x
+ don't alias, making the store to X not dependent on the load from Y.
+2. The generated isel could be made smarter in the case it can't
+ disambiguate the pointers.
+
+Number 1 is the preferred solution.
+
+This has been "fixed" by a TableGen hack. But that is a short term workaround
+which will be removed once the proper fix is made.
+
+//===---------------------------------------------------------------------===//
+
+On targets with expensive 64-bit multiply, we could LSR this:
+
+for (i = ...; ++i) {
+ x = 1ULL << i;
+
+into:
+ long long tmp = 1;
+ for (i = ...; ++i, tmp+=tmp)
+ x = tmp;
+
+This would be a win on ppc32, but not x86 or ppc64.
+
+//===---------------------------------------------------------------------===//
+
+Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn: X*X*X*X -> t=(X*X) (t*t) to eliminate a multiply.
+
+//===---------------------------------------------------------------------===//
+
+Interesting? testcase for add/shift/mul reassoc:
+
+int bar(int x, int y) {
+ return x*x*x+y+x*x*x*x*x*y*y*y*y;
+}
+int foo(int z, int n) {
+ return bar(z, n) + bar(2*z, 2*n);
+}
+
+Reassociate should handle the example in GCC PR16157.
+
+//===---------------------------------------------------------------------===//
+
+These two functions should generate the same code on big-endian systems:
+
+int g(int *j,int *l) { return memcmp(j,l,4); }
+int h(int *j, int *l) { return *j - *l; }
+
+this could be done in SelectionDAGISel.cpp, along with other special cases,
+for 1,2,4,8 bytes.
+
+//===---------------------------------------------------------------------===//
+
+It would be nice to revert this patch:
+http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
+
+And teach the dag combiner enough to simplify the code expanded before
+legalize. It seems plausible that this knowledge would let it simplify other
+stuff too.
+
+//===---------------------------------------------------------------------===//
+
+For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+to the type size. It works but can be overly conservative as the alignment of
+specific vector types are target dependent.
+
+//===---------------------------------------------------------------------===//
+
+We should produce an unaligned load from code like this:
+
+v4sf example(float *P) {
+ return (v4sf){P[0], P[1], P[2], P[3] };
+}
+
+//===---------------------------------------------------------------------===//
+
+Add support for conditional increments, and other related patterns. Instead
+of:
+
+ movl 136(%esp), %eax
+ cmpl $0, %eax
+ je LBB16_2 #cond_next
+LBB16_1: #cond_true
+ incl _foo
+LBB16_2: #cond_next
+
+emit:
+ movl _foo, %eax
+ cmpl $1, %edi
+ sbbl $-1, %eax
+ movl %eax, _foo
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+
+Expand these to calls of sin/cos and stores:
+ double sincos(double x, double *sin, double *cos);
+ float sincosf(float x, float *sin, float *cos);
+ long double sincosl(long double x, long double *sin, long double *cos);
+
+Doing so could allow SROA of the destination pointers. See also:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687
+
+This is now easily doable with MRVs. We could even make an intrinsic for this
+if anyone cared enough about sincos.
+
+//===---------------------------------------------------------------------===//
+
+Turn this into a single byte store with no load (the other 3 bytes are
+unmodified):
+
+define void @test(i32* %P) {
+ %tmp = load i32* %P
+ %tmp14 = or i32 %tmp, 3305111552
+ %tmp15 = and i32 %tmp14, 3321888767
+ store i32 %tmp15, i32* %P
+ ret void
+}
+
+//===---------------------------------------------------------------------===//
+
+dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x.
+
+Compile:
+
+int bar(int x)
+{
+ int t = __builtin_clz(x);
+ return -(t>>5);
+}
+
+to:
+
+_bar: addic r3,r3,-1
+ subfe r3,r3,r3
+ blr
+
+//===---------------------------------------------------------------------===//
+
+Legalize should lower ctlz like this:
+ ctlz(x) = popcnt((x-1) & ~x)
+
+on targets that have popcnt but not ctlz. itanium, what else?
+
+//===---------------------------------------------------------------------===//
+
+quantum_sigma_x in 462.libquantum contains the following loop:
+
+ for(i=0; i<reg->size; i++)
+ {
+ /* Flip the target bit of each basis state */
+ reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target);
+ }
+
+Where MAX_UNSIGNED/state is a 64-bit int. On a 32-bit platform it would be just
+so cool to turn it into something like:
+
+ long long Res = ((MAX_UNSIGNED) 1 << target);
+ if (target < 32) {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFFULL;
+ } else {
+ for(i=0; i<reg->size; i++)
+ reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL
+ }
+
+... which would only do one 32-bit XOR per loop iteration instead of two.
+
+It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
+alas...
+
+//===---------------------------------------------------------------------===//
+
+This isn't recognized as bswap by instcombine (yes, it really is bswap):
+
+unsigned long reverse(unsigned v) {
+ unsigned t;
+ t = v ^ ((v << 16) | (v >> 16));
+ t &= ~0xff0000;
+ v = (v << 24) | (v >> 8);
+ return v ^ (t >> 8);
+}
+
+//===---------------------------------------------------------------------===//
+
+These idioms should be recognized as popcount (see PR1488):
+
+unsigned countbits_slow(unsigned v) {
+ unsigned c;
+ for (c = 0; v; v >>= 1)
+ c += v & 1;
+ return c;
+}
+unsigned countbits_fast(unsigned v){
+ unsigned c;
+ for (c = 0; v; c++)
+ v &= v - 1; // clear the least significant bit set
+ return c;
+}
+
+BITBOARD = unsigned long long
+int PopCnt(register BITBOARD a) {
+ register int c=0;
+ while(a) {
+ c++;
+ a &= a - 1;
+ }
+ return c;
+}
+unsigned int popcount(unsigned int input) {
+ unsigned int count = 0;
+ for (unsigned int i = 0; i < 4 * 8; i++)
+ count += (input >> i) & i;
+ return count;
+}
+
+//===---------------------------------------------------------------------===//
+
+These should turn into single 16-bit (unaligned?) loads on little/big endian
+processors.
+
+unsigned short read_16_le(const unsigned char *adr) {
+ return adr[0] | (adr[1] << 8);
+}
+unsigned short read_16_be(const unsigned char *adr) {
+ return (adr[0] << 8) | adr[1];
+}
+
+//===---------------------------------------------------------------------===//
+
+-instcombine should handle this transform:
+ icmp pred (sdiv X / C1 ), C2
+when X, C1, and C2 are unsigned. Similarly for udiv and signed operands.
+
+Currently InstCombine avoids this transform but will do it when the signs of
+the operands and the sign of the divide match. See the FIXME in
+InstructionCombining.cpp in the visitSetCondInst method after the switch case
+for Instruction::UDiv (around line 4447) for more details.
+
+The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of
+this construct.
+
+//===---------------------------------------------------------------------===//
+
+viterbi speeds up *significantly* if the various "history" related copy loops
+are turned into memcpy calls at the source level. We need a "loops to memcpy"
+pass.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+typedef unsigned U32;
+typedef unsigned long long U64;
+int test (U32 *inst, U64 *regs) {
+ U64 effective_addr2;
+ U32 temp = *inst;
+ int r1 = (temp >> 20) & 0xf;
+ int b2 = (temp >> 16) & 0xf;
+ effective_addr2 = temp & 0xfff;
+ if (b2) effective_addr2 += regs[b2];
+ b2 = (temp >> 12) & 0xf;
+ if (b2) effective_addr2 += regs[b2];
+ effective_addr2 &= regs[4];
+ if ((effective_addr2 & 3) == 0)
+ return 1;
+ return 0;
+}
+
+Note that only the low 2 bits of effective_addr2 are used. On 32-bit systems,
+we don't eliminate the computation of the top half of effective_addr2 because
+we don't have whole-function selection dags. On x86, this means we use one
+extra register for the function when effective_addr2 is declared as U64 than
+when it is declared U32.
+
+//===---------------------------------------------------------------------===//
+
+Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit
+regs and bswap, like itanium.
+
+//===---------------------------------------------------------------------===//
+
+LSR should know what GPR types a target has. This code:
+
+volatile short X, Y; // globals
+
+void foo(int N) {
+ int i;
+ for (i = 0; i < N; i++) { X = i; Y = i*4; }
+}
+
+produces two identical IV's (after promotion) on PPC/ARM:
+
+LBB1_1: @bb.preheader
+ mov r3, #0
+ mov r2, r3
+ mov r1, r3
+LBB1_2: @bb
+ ldr r12, LCPI1_0
+ ldr r12, [r12]
+ strh r2, [r12]
+ ldr r12, LCPI1_1
+ ldr r12, [r12]
+ strh r3, [r12]
+ add r1, r1, #1 <- [0,+,1]
+ add r3, r3, #4
+ add r2, r2, #1 <- [0,+,1]
+ cmp r1, r0
+ bne LBB1_2 @bb
+
+
+//===---------------------------------------------------------------------===//
+
+Tail call elim should be more aggressive, checking to see if the call is
+followed by an uncond branch to an exit block.
+
+; This testcase is due to tail-duplication not wanting to copy the return
+; instruction into the terminating blocks because there was other code
+; optimized out of the function after the taildup happened.
+; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+
+define i32 @t4(i32 %a) {
+entry:
+ %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1]
+ %tmp.2 = icmp ne i32 %tmp.1, 0 ; <i1> [#uses=1]
+ br i1 %tmp.2, label %then.0, label %else.0
+
+then.0: ; preds = %entry
+ %tmp.5 = add i32 %a, -1 ; <i32> [#uses=1]
+ %tmp.3 = call i32 @t4( i32 %tmp.5 ) ; <i32> [#uses=1]
+ br label %return
+
+else.0: ; preds = %entry
+ %tmp.7 = icmp ne i32 %a, 0 ; <i1> [#uses=1]
+ br i1 %tmp.7, label %then.1, label %return
+
+then.1: ; preds = %else.0
+ %tmp.11 = add i32 %a, -2 ; <i32> [#uses=1]
+ %tmp.9 = call i32 @t4( i32 %tmp.11 ) ; <i32> [#uses=1]
+ br label %return
+
+return: ; preds = %then.1, %else.0, %then.0
+ %result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ],
+ [ %tmp.9, %then.1 ]
+ ret i32 %result.0
+}
+
+//===---------------------------------------------------------------------===//
+
+Tail recursion elimination is not transforming this function, because it is
+returning n, which fails the isDynamicConstant check in the accumulator
+recursion checks.
+
+long long fib(const long long n) {
+ switch(n) {
+ case 0:
+ case 1:
+ return n;
+ default:
+ return fib(n-1) + fib(n-2);
+ }
+}
+
+//===---------------------------------------------------------------------===//
+
+Tail recursion elimination should handle:
+
+int pow2m1(int n) {
+ if (n == 0)
+ return 0;
+ return 2 * pow2m1 (n - 1) + 1;
+}
+
+Also, multiplies can be turned into SHL's, so they should be handled as if
+they were associative. "return foo() << 1" can be tail recursion eliminated.
+
+//===---------------------------------------------------------------------===//
+
+Argument promotion should promote arguments for recursive functions, like
+this:
+
+; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val
+
+define internal i32 @foo(i32* %x) {
+entry:
+ %tmp = load i32* %x ; <i32> [#uses=0]
+ %tmp.foo = call i32 @foo( i32* %x ) ; <i32> [#uses=1]
+ ret i32 %tmp.foo
+}
+
+define i32 @bar(i32* %x) {
+entry:
+ %tmp3 = call i32 @foo( i32* %x ) ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+//===---------------------------------------------------------------------===//
+
+"basicaa" should know how to look through "or" instructions that act like add
+instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and
+basicaa can't analyze the array subscript, leading to duplicated loads in the
+generated code:
+
+void test(int X, int Y, int a[]) {
+int i;
+ for (i=2; i<1000; i+=4) {
+ a[i+0] = a[i-1+0]*a[i-2+0];
+ a[i+1] = a[i-1+1]*a[i-2+1];
+ a[i+2] = a[i-1+2]*a[i-2+2];
+ a[i+3] = a[i-1+3]*a[i-2+3];
+ }
+}
+
+BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i].
+
+//===---------------------------------------------------------------------===//
+
+We should investigate an instruction sinking pass. Consider this silly
+example in pic mode:
+
+#include <assert.h>
+void foo(int x) {
+ assert(x);
+ //...
+}
+
+we compile this to:
+_foo:
+ subl $28, %esp
+ call "L1$pb"
+"L1$pb":
+ popl %eax
+ cmpl $0, 32(%esp)
+ je LBB1_2 # cond_true
+LBB1_1: # return
+ # ...
+ addl $28, %esp
+ ret
+LBB1_2: # cond_true
+...
+
+The PIC base computation (call+popl) is only used on one path through the
+code, but is currently always computed in the entry block. It would be
+better to sink the picbase computation down into the block for the
+assertion, as it is the only one that uses it. This happens for a lot of
+code with early outs.
+
+Another example is loads of arguments, which are usually emitted into the
+entry block on targets like x86. If not used in all paths through a
+function, they should be sunk into the ones that do.
+
+In this case, whole-function-isel would also handle this.
+
+//===---------------------------------------------------------------------===//
+
+Investigate lowering of sparse switch statements into perfect hash tables:
+http://burtleburtle.net/bob/hash/perfect.html
+
+//===---------------------------------------------------------------------===//
+
+We should turn things like "load+fabs+store" and "load+fneg+store" into the
+corresponding integer operations. On a yonah, this loop:
+
+double a[256];
+void foo() {
+ int i, b;
+ for (b = 0; b < 10000000; b++)
+ for (i = 0; i < 256; i++)
+ a[i] = -a[i];
+}
+
+is twice as slow as this loop:
+
+long long a[256];
+void foo() {
+ int i, b;
+ for (b = 0; b < 10000000; b++)
+ for (i = 0; i < 256; i++)
+ a[i] ^= (1ULL << 63);
+}
+
+and I suspect other processors are similar. On X86 in particular this is a
+big win because doing this with integers allows the use of read/modify/write
+instructions.
+
+//===---------------------------------------------------------------------===//
+
+DAG Combiner should try to combine small loads into larger loads when
+profitable. For example, we compile this C++ example:
+
+struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
+extern THotKey m_HotKey;
+THotKey GetHotKey () { return m_HotKey; }
+
+into (-O3 -fno-exceptions -static -fomit-frame-pointer):
+
+__Z9GetHotKeyv:
+ pushl %esi
+ movl 8(%esp), %eax
+ movb _m_HotKey+3, %cl
+ movb _m_HotKey+4, %dl
+ movb _m_HotKey+2, %ch
+ movw _m_HotKey, %si
+ movw %si, (%eax)
+ movb %ch, 2(%eax)
+ movb %cl, 3(%eax)
+ movb %dl, 4(%eax)
+ popl %esi
+ ret $4
+
+GCC produces:
+
+__Z9GetHotKeyv:
+ movl _m_HotKey, %edx
+ movl 4(%esp), %eax
+ movl %edx, (%eax)
+ movzwl _m_HotKey+4, %edx
+ movw %dx, 4(%eax)
+ ret $4
+
+The LLVM IR contains the needed alignment info, so we should be able to
+merge the loads and stores into 4-byte loads:
+
+ %struct.THotKey = type { i16, i8, i8, i8 }
+define void @_Z9GetHotKeyv(%struct.THotKey* sret %agg.result) nounwind {
+...
+ %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8
+ %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2
+ %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1
+ %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2
+
+Alternatively, we should use a small amount of base-offset alias analysis
+to make it so the scheduler doesn't need to hold all the loads in regs at
+once.
+
+//===---------------------------------------------------------------------===//
+
+We should add an FRINT node to the DAG to model targets that have legal
+implementations of ceil/floor/rint.
+
+//===---------------------------------------------------------------------===//
+
+This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043
+contains a testcase that compiles down to:
+
+ %struct.XMM128 = type { <4 x float> }
+..
+ %src = alloca %struct.XMM128
+..
+ %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>*
+ %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0
+ store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16
+ %tmp66 = load <4 x float>* %tmp65, align 16
+ %tmp71 = add <4 x float> %tmp66, %tmp66
+
+If the mid-level optimizer turned the bitcast of pointer + store of tmp5899
+into a bitcast of the vector value and a store to the pointer, then the
+store->load could be easily removed.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+int test() {
+ long long input[8] = {1,1,1,1,1,1,1,1};
+ foo(input);
+}
+
+We currently compile this into a memcpy from a global array since the
+initializer is fairly large and not memset'able. This is good, but the memcpy
+gets lowered to load/stores in the code generator. This is also ok, except
+that the codegen lowering for memcpy doesn't handle the case when the source
+is a constant global. This gives us atrocious code like this:
+
+ call "L1$pb"
+"L1$pb":
+ popl %eax
+ movl _C.0.1444-"L1$pb"+32(%eax), %ecx
+ movl %ecx, 40(%esp)
+ movl _C.0.1444-"L1$pb"+20(%eax), %ecx
+ movl %ecx, 28(%esp)
+ movl _C.0.1444-"L1$pb"+36(%eax), %ecx
+ movl %ecx, 44(%esp)
+ movl _C.0.1444-"L1$pb"+44(%eax), %ecx
+ movl %ecx, 52(%esp)
+ movl _C.0.1444-"L1$pb"+40(%eax), %ecx
+ movl %ecx, 48(%esp)
+ movl _C.0.1444-"L1$pb"+12(%eax), %ecx
+ movl %ecx, 20(%esp)
+ movl _C.0.1444-"L1$pb"+4(%eax), %ecx
+...
+
+instead of:
+ movl $1, 16(%esp)
+ movl $0, 20(%esp)
+ movl $1, 24(%esp)
+ movl $0, 28(%esp)
+ movl $1, 32(%esp)
+ movl $0, 36(%esp)
+ ...
+
+//===---------------------------------------------------------------------===//
+
+http://llvm.org/PR717:
+
+The following code should compile into "ret int undef". Instead, LLVM
+produces "ret int 0":
+
+int f() {
+ int x = 4;
+ int y;
+ if (x == 3) y = 0;
+ return y;
+}
+
+//===---------------------------------------------------------------------===//
+
+The loop unroller should partially unroll loops (instead of peeling them)
+when code growth isn't too bad and when an unroll count allows simplification
+of some code within the loop. One trivial example is:
+
+#include <stdio.h>
+int main() {
+ int nRet = 17;
+ int nLoop;
+ for ( nLoop = 0; nLoop < 1000; nLoop++ ) {
+ if ( nLoop & 1 )
+ nRet += 2;
+ else
+ nRet -= 1;
+ }
+ return nRet;
+}
+
+Unrolling by 2 would eliminate the '&1' in both copies, leading to a net
+reduction in code size. The resultant code would then also be suitable for
+exit value computation.
+
+//===---------------------------------------------------------------------===//
+
+We miss a bunch of rotate opportunities on various targets, including ppc, x86,
+etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate
+matching code in dag combine doesn't look through truncates aggressively
+enough. Here are some testcases reduces from GCC PR17886:
+
+unsigned long long f(unsigned long long x, int y) {
+ return (x << y) | (x >> 64-y);
+}
+unsigned f2(unsigned x, int y){
+ return (x << y) | (x >> 32-y);
+}
+unsigned long long f3(unsigned long long x){
+ int y = 9;
+ return (x << y) | (x >> 64-y);
+}
+unsigned f4(unsigned x){
+ int y = 10;
+ return (x << y) | (x >> 32-y);
+}
+unsigned long long f5(unsigned long long x, unsigned long long y) {
+ return (x << 8) | ((y >> 48) & 0xffull);
+}
+unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
+ switch(z) {
+ case 1:
+ return (x << 8) | ((y >> 48) & 0xffull);
+ case 2:
+ return (x << 16) | ((y >> 40) & 0xffffull);
+ case 3:
+ return (x << 24) | ((y >> 32) & 0xffffffull);
+ case 4:
+ return (x << 32) | ((y >> 24) & 0xffffffffull);
+ default:
+ return (x << 40) | ((y >> 16) & 0xffffffffffull);
+ }
+}
+
+On X86-64, we only handle f2/f3/f4 right. On x86-32, a few of these
+generate truly horrible code, instead of using shld and friends. On
+ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is
+badness. PPC64 misses f, f5 and f6. CellSPU aborts in isel.
+
+//===---------------------------------------------------------------------===//
+
+We do a number of simplifications in simplify libcalls to strength reduce
+standard library functions, but we don't currently merge them together. For
+example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy. This can only
+be done safely if "b" isn't modified between the strlen and memcpy of course.
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn things like:
+
+int factorial(int X) {
+ return X*X*X*X*X*X*X*X;
+}
+
+into llvm.powi calls, allowing the code generator to produce balanced
+multiplication trees.
+
+//===---------------------------------------------------------------------===//
+
+We generate a horrible libcall for llvm.powi. For example, we compile:
+
+#include <cmath>
+double f(double a) { return std::pow(a, 4); }
+
+into:
+
+__Z1fd:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ movsd %xmm0, (%esp)
+ movl $4, 8(%esp)
+ call L___powidf2$stub
+ addl $12, %esp
+ ret
+
+GCC produces:
+
+__Z1fd:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ mulsd %xmm0, %xmm0
+ mulsd %xmm0, %xmm0
+ movsd %xmm0, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We compile this program: (from GCC PR11680)
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487
+
+Into code that runs the same speed in fast/slow modes, but both modes run 2x
+slower than when compile with GCC (either 4.0 or 4.2):
+
+$ llvm-g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+1.821u 0.003s 0:01.82 100.0% 0+0k 0+0io 0pf+0w
+
+$ g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+0.821u 0.001s 0:00.82 100.0% 0+0k 0+0io 0pf+0w
+
+It looks like we are making the same inlining decisions, so this may be raw
+codegen badness or something else (haven't investigated).
+
+//===---------------------------------------------------------------------===//
+
+We miss some instcombines for stuff like this:
+void bar (void);
+void foo (unsigned int a) {
+ /* This one is equivalent to a >= (3 << 2). */
+ if ((a >> 2) >= 3)
+ bar ();
+}
+
+A few other related ones are in GCC PR14753.
+
+//===---------------------------------------------------------------------===//
+
+Divisibility by constant can be simplified (according to GCC PR12849) from
+being a mulhi to being a mul lo (cheaper). Testcase:
+
+void bar(unsigned n) {
+ if (n % 3 == 0)
+ true();
+}
+
+I think this basically amounts to a dag combine to simplify comparisons against
+multiply hi's into a comparison against the mullo.
+
+//===---------------------------------------------------------------------===//
+
+Better mod/ref analysis for scanf would allow us to eliminate the vtable and a
+bunch of other stuff from this example (see PR1604):
+
+#include <cstdio>
+struct test {
+ int val;
+ virtual ~test() {}
+};
+
+int main() {
+ test t;
+ std::scanf("%d", &t.val);
+ std::printf("%d\n", t.val);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x -
+10) u< 10, but only when the comparisons have matching sign.
+
+This could be converted with a similiar technique. (PR1941)
+
+define i1 @test(i8 %x) {
+ %A = icmp uge i8 %x, 5
+ %B = icmp slt i8 %x, 20
+ %C = and i1 %A, %B
+ ret i1 %C
+}
+
+//===---------------------------------------------------------------------===//
+
+These functions perform the same computation, but produce different assembly.
+
+define i8 @select(i8 %x) readnone nounwind {
+ %A = icmp ult i8 %x, 250
+ %B = select i1 %A, i8 0, i8 1
+ ret i8 %B
+}
+
+define i8 @addshr(i8 %x) readnone nounwind {
+ %A = zext i8 %x to i9
+ %B = add i9 %A, 6 ;; 256 - 250 == 6
+ %C = lshr i9 %B, 8
+ %D = trunc i9 %C to i8
+ ret i8 %D
+}
+
+//===---------------------------------------------------------------------===//
+
+From gcc bug 24696:
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+ return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0);
+}
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+ return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
+}
+Both should combine to ((a|b) & (c-1)) != 0. Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 20192:
+#define PMD_MASK (~((1UL << 23) - 1))
+void clear_pmd_range(unsigned long start, unsigned long end)
+{
+ if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK))
+ f();
+}
+The expression should optimize to something like
+"!((start|end)&~PMD_MASK). Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 15241:
+unsigned int
+foo (unsigned int a, unsigned int b)
+{
+ if (a <= 7 && b <= 7)
+ baz ();
+}
+Should combine to "(a|b) <= 7". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 3756:
+int
+pn (int n)
+{
+ return (n >= 0 ? 1 : -1);
+}
+Should combine to (n >> 31) | 1. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 28685:
+int test(int a, int b)
+{
+ int lt = a < b;
+ int eq = a == b;
+
+ return (lt || eq);
+}
+Should combine to "a <= b". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+void a(int variable)
+{
+ if (variable == 4 || variable == 6)
+ bar();
+}
+This should optimize to "if ((variable | 2) == 6)". Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc".
+
+//===---------------------------------------------------------------------===//
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return
+i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+These should combine to the same thing. Currently, the first function
+produces better code on X86.
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 15784:
+#define abs(x) x>0?x:-x
+int f(int x, int y)
+{
+ return (abs(x)) >= 0;
+}
+This should optimize to x == INT_MIN. (With -fwrapv.) Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 14753:
+void
+rotate_cst (unsigned int a)
+{
+ a = (a << 10) | (a >> 22);
+ if (a == 123)
+ bar ();
+}
+void
+minus_cst (unsigned int a)
+{
+ unsigned int tem;
+
+ tem = 20 - a;
+ if (tem == 5)
+ bar ();
+}
+void
+mask_gt (unsigned int a)
+{
+ /* This is equivalent to a > 15. */
+ if ((a & ~7) > 8)
+ bar ();
+}
+void
+rshift_gt (unsigned int a)
+{
+ /* This is equivalent to a > 23. */
+ if ((a >> 2) > 5)
+ bar ();
+}
+All should simplify to a single comparison. All of these are
+currently not optimized with "clang -emit-llvm-bc | opt
+-std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 32605:
+int c(int* x) {return (char*)x+2 == (char*)x;}
+Should combine to 0. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+
+//===---------------------------------------------------------------------===//
+
+int a(unsigned char* b) {return *b > 99;}
+There's an unnecessary zext in the generated code with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
+Should be combined to "((b >> 1) | b) & 1". Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
+Should combine to "x | (y & 3)". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);}
+Should combine to "a | 1". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
+Should fold to "(~a & c) | (a & b)". Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a,int b) {return (~(a|b))|a;}
+Should fold to "a|~b". Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b) {return (a&&b) || (a&&!b);}
+Should fold to "a". Currently not optimized with "clang -emit-llvm-bc
+| opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
+Should fold to "a ? b : c", or at least something sane. Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
+Should fold to a && (b || c). Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x | ((x & 8) ^ 8);}
+Should combine to x | 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x ^ ((x & 8) ^ 8);}
+Should also combine to x | 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return (x & 8) == 0 ? -1 : -9;}
+Should combine to (x | -9) ^ 8. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return (x & 8) == 0 ? -9 : -1;}
+Should combine to x | -9. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return ((x | -9) ^ 8) & x;}
+Should combine to x & -9. Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
+Should combine to "a * 0x88888888 >> 31". Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(char* x) {if ((*x & 32) == 0) return b();}
+There's an unnecessary zext in the generated code with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned long long x) {return 40 * (x >> 1);}
+Should combine to "20 * (((unsigned)x) & -2)". Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+We would like to do the following transform in the instcombiner:
+
+ -X/C -> X/-C
+
+However, this isn't valid if (-X) overflows. We can implement this when we
+have the concept of a "C signed subtraction" operator that which is undefined
+on overflow.
+
+//===---------------------------------------------------------------------===//
+
+This was noticed in the entryblock for grokdeclarator in 403.gcc:
+
+ %tmp = icmp eq i32 %decl_context, 4
+ %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context
+ %tmp1 = icmp eq i32 %decl_context_addr.0, 1
+ %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0
+
+tmp1 should be simplified to something like:
+ (!tmp || decl_context == 1)
+
+This allows recursive simplifications, tmp1 is used all over the place in
+the function, e.g. by:
+
+ %tmp23 = icmp eq i32 %decl_context_addr.1, 0 ; <i1> [#uses=1]
+ %tmp24 = xor i1 %tmp1, true ; <i1> [#uses=1]
+ %or.cond8 = and i1 %tmp23, %tmp24 ; <i1> [#uses=1]
+
+later.
+
+//===---------------------------------------------------------------------===//
+
+Store sinking: This code:
+
+void f (int n, int *cond, int *res) {
+ int i;
+ *res = 0;
+ for (i = 0; i < n; i++)
+ if (*cond)
+ *res ^= 234; /* (*) */
+}
+
+On this function GVN hoists the fully redundant value of *res, but nothing
+moves the store out. This gives us this code:
+
+bb: ; preds = %bb2, %entry
+ %.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ]
+ %i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ]
+ %1 = load i32* %cond, align 4
+ %2 = icmp eq i32 %1, 0
+ br i1 %2, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ %3 = xor i32 %.rle, 234
+ store i32 %3, i32* %res, align 4
+ br label %bb2
+
+bb2: ; preds = %bb, %bb1
+ %.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ]
+ %indvar.next = add i32 %i.05, 1
+ %exitcond = icmp eq i32 %indvar.next, %n
+ br i1 %exitcond, label %return, label %bb
+
+DSE should sink partially dead stores to get the store out of the loop.
+
+Here's another partial dead case:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395
+
+//===---------------------------------------------------------------------===//
+
+Scalar PRE hoists the mul in the common block up to the else:
+
+int test (int a, int b, int c, int g) {
+ int d, e;
+ if (a)
+ d = b * c;
+ else
+ d = b - c;
+ e = b * c + g;
+ return d + e;
+}
+
+It would be better to do the mul once to reduce codesize above the if.
+This is GCC PR38204.
+
+//===---------------------------------------------------------------------===//
+
+GCC PR37810 is an interesting case where we should sink load/store reload
+into the if block and outside the loop, so we don't reload/store it on the
+non-call path.
+
+for () {
+ *P += 1;
+ if ()
+ call();
+ else
+ ...
+->
+tmp = *P
+for () {
+ tmp += 1;
+ if () {
+ *P = tmp;
+ call();
+ tmp = *P;
+ } else ...
+}
+*P = tmp;
+
+We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but
+we don't sink the store. We need partially dead store sinking.
+
+//===---------------------------------------------------------------------===//
+
+[PHI TRANSLATE GEPs]
+
+GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack
+leading to excess stack traffic. This could be handled by GVN with some crazy
+symbolic phi translation. The code we get looks like (g is on the stack):
+
+bb2: ; preds = %bb1
+..
+ %9 = getelementptr %struct.f* %g, i32 0, i32 0
+ store i32 %8, i32* %9, align bel %bb3
+
+bb3: ; preds = %bb1, %bb2, %bb
+ %c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ]
+ %b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ]
+ %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0
+ %11 = load i32* %10, align 4
+
+%11 is fully redundant, an in BB2 it should have the value %8.
+
+GCC PR33344 is a similar case.
+
+//===---------------------------------------------------------------------===//
+
+There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
+GCC testsuite. There are many pre testcases as ssa-pre-*.c
+
+//===---------------------------------------------------------------------===//
+
+There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
+GCC testsuite. For example, predcom-1.c is:
+
+ for (i = 2; i < 1000; i++)
+ fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
+
+which compiles into:
+
+bb1: ; preds = %bb1, %bb1.thread
+ %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ]
+ %i.0.reg2mem.0 = add i32 %indvar, 2
+ %0 = add i32 %indvar, 1 ; <i32> [#uses=3]
+ %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0
+ %2 = load i32* %1, align 4 ; <i32> [#uses=1]
+ %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar
+ %4 = load i32* %3, align 4 ; <i32> [#uses=1]
+ %5 = add i32 %4, %2 ; <i32> [#uses=1]
+ %6 = and i32 %5, 65535 ; <i32> [#uses=1]
+ %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0
+ store i32 %6, i32* %7, align 4
+ %exitcond = icmp eq i32 %0, 998 ; <i1> [#uses=1]
+ br i1 %exitcond, label %return, label %bb1
+
+This is basically:
+ LOAD fib[i+1]
+ LOAD fib[i]
+ STORE fib[i+2]
+
+instead of handling this as a loop or other xform, all we'd need to do is teach
+load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) =
+(i'+2) (where i' is the previous iteration of i). This would find the store
+which feeds it.
+
+predcom-2.c is apparently the same as predcom-1.c
+predcom-3.c is very similar but needs loads feeding each other instead of
+store->load.
+predcom-4.c seems the same as the rest.
+
+
+//===---------------------------------------------------------------------===//
+
+Other simple load PRE cases:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
+ llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
+
+//===---------------------------------------------------------------------===//
+
+Type based alias analysis:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
+
+//===---------------------------------------------------------------------===//
+
+When GVN/PRE finds a store of float* to a must aliases pointer when expecting
+an int*, it should turn it into a bitcast. This is a nice generalization of
+the SROA hack that would apply to other cases, e.g.:
+
+int foo(int C, int *P, float X) {
+ if (C) {
+ bar();
+ *P = 42;
+ } else
+ *(float*)P = X;
+
+ return *P;
+}
+
+
+One example (that requires crazy phi translation) is:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
+
+//===---------------------------------------------------------------------===//
+
+A/B get pinned to the stack because we turn an if/then into a select instead
+of PRE'ing the load/store. This may be fixable in instcombine:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892
+
+
+
+Interesting missed case because of control flow flattening (should be 2 loads):
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
+With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as |
+ opt -mem2reg -gvn -instcombine | llvm-dis
+we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT
+VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633
+We could eliminate the branch condition here, loading from null is undefined:
+
+struct S { int w, x, y, z; };
+struct T { int r; struct S s; };
+void bar (struct S, int);
+void foo (int a, struct T b)
+{
+ struct S *c = 0;
+ if (a)
+ c = &b.s;
+ bar (*c, a);
+}
+
+//===---------------------------------------------------------------------===//
+
+simplifylibcalls should do several optimizations for strspn/strcspn:
+
+strcspn(x, "") -> strlen(x)
+strcspn("", x) -> 0
+strspn("", x) -> 0
+strspn(x, "") -> strlen(x)
+strspn(x, "a") -> strchr(x, 'a')-x
+
+strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn):
+
+size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2,
+ int __reject3) {
+ register size_t __result = 0;
+ while (__s[__result] != '\0' && __s[__result] != __reject1 &&
+ __s[__result] != __reject2 && __s[__result] != __reject3)
+ ++__result;
+ return __result;
+}
+
+This should turn into a switch on the character. See PR3253 for some notes on
+codegen.
+
+456.hmmer apparently uses strcspn and strspn a lot. 471.omnetpp uses strspn.
+
+//===---------------------------------------------------------------------===//
+
+"gas" uses this idiom:
+ else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string))
+..
+ else if (strchr ("<>", *intel_parser.op_string)
+
+Those should be turned into a switch.
+
+//===---------------------------------------------------------------------===//
+
+252.eon contains this interesting code:
+
+ %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0
+ %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
+ %strlen = call i32 @strlen(i8* %3072) ; uses = 1
+ %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen
+ call void @llvm.memcpy.i32(i8* %endptr,
+ i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1)
+ %3074 = call i32 @strlen(i8* %endptr) nounwind readonly
+
+This is interesting for a couple reasons. First, in this:
+
+ %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
+ %strlen = call i32 @strlen(i8* %3072)
+
+The strlen could be replaced with: %strlen = sub %3072, %3073, because the
+strcpy call returns a pointer to the end of the string. Based on that, the
+endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP.
+
+Second, the memcpy+strlen strlen can be replaced with:
+
+ %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly
+
+Because the destination was just copied into the specified memory buffer. This,
+in turn, can be constant folded to "4".
+
+In other code, it contains:
+
+ %endptr6978 = bitcast i8* %endptr69 to i32*
+ store i32 7107374, i32* %endptr6978, align 1
+ %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly
+
+Which could also be constant folded. Whatever is producing this should probably
+be fixed to leave this as a memcpy from a string.
+
+Further, eon also has an interesting partially redundant strlen call:
+
+bb8: ; preds = %_ZN18eonImageCalculatorC1Ev.exit
+ %682 = getelementptr i8** %argv, i32 6 ; <i8**> [#uses=2]
+ %683 = load i8** %682, align 4 ; <i8*> [#uses=4]
+ %684 = load i8* %683, align 1 ; <i8> [#uses=1]
+ %685 = icmp eq i8 %684, 0 ; <i1> [#uses=1]
+ br i1 %685, label %bb10, label %bb9
+
+bb9: ; preds = %bb8
+ %686 = call i32 @strlen(i8* %683) nounwind readonly
+ %687 = icmp ugt i32 %686, 254 ; <i1> [#uses=1]
+ br i1 %687, label %bb10, label %bb11
+
+bb10: ; preds = %bb9, %bb8
+ %688 = call i32 @strlen(i8* %683) nounwind readonly
+
+This could be eliminated by doing the strlen once in bb8, saving code size and
+improving perf on the bb8->9->10 path.
+
+//===---------------------------------------------------------------------===//
+
+I see an interesting fully redundant call to strlen left in 186.crafty:InputMove
+which looks like:
+ %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0
+
+
+bb62: ; preds = %bb55, %bb53
+ %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ]
+ %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+ %172 = add i32 %171, -1 ; <i32> [#uses=1]
+ %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172
+
+... no stores ...
+ br i1 %or.cond, label %bb65, label %bb72
+
+bb65: ; preds = %bb62
+ store i8 0, i8* %173, align 1
+ br label %bb72
+
+bb72: ; preds = %bb65, %bb62
+ %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ]
+ %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+
+Note that on the bb62->bb72 path, that the %177 strlen call is partially
+redundant with the %171 call. At worst, we could shove the %177 strlen call
+up into the bb65 block moving it out of the bb62->bb72 path. However, note
+that bb65 stores to the string, zeroing out the last byte. This means that on
+that path the value of %177 is actually just %171-1. A sub is cheaper than a
+strlen!
+
+This pattern repeats several times, basically doing:
+
+ A = strlen(P);
+ P[A-1] = 0;
+ B = strlen(P);
+ where it is "obvious" that B = A-1.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty contains this interesting pattern:
+
+%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0),
+ i8* %30)
+%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0)
+br i1 %phitmp648, label %bb70, label %bb76
+
+bb70: ; preds = %OptionMatch.exit91, %bb69
+ %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1]
+
+This is basically:
+ cststr = "abcdef";
+ if (strstr(cststr, P) == cststr) {
+ x = strlen(P);
+ ...
+
+The strstr call would be significantly cheaper written as:
+
+cststr = "abcdef";
+if (memcmp(P, str, strlen(P)))
+ x = strlen(P);
+
+This is memcmp+strlen instead of strstr. This also makes the strlen fully
+redundant.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty also contains this code:
+
+%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
+%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906
+%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1
+%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
+%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909
+
+The last strlen is computable as 1908-@pgn_event, which means 1910=1908.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty has this interesting pattern with the "out.4543" variable:
+
+call void @llvm.memcpy.i32(
+ i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0),
+ i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1)
+%101 = call@printf(i8* ... @out.4543, i32 0, i32 0)) nounwind
+
+It is basically doing:
+
+ memcpy(globalarray, "string");
+ printf(..., globalarray);
+
+Anyway, by knowing that printf just reads the memory and forward substituting
+the string directly into the printf, this eliminates reads from globalarray.
+Since this pattern occurs frequently in crafty (due to the "DisplayTime" and
+other similar functions) there are many stores to "out". Once all the printfs
+stop using "out", all that is left is the memcpy's into it. This should allow
+globalopt to remove the "stored only" global.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+define inreg i32 @foo(i8* inreg %p) nounwind {
+ %tmp0 = load i8* %p
+ %tmp1 = ashr i8 %tmp0, 5
+ %tmp2 = sext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+could be dagcombine'd to a sign-extending load with a shift.
+For example, on x86 this currently gets this:
+
+ movb (%eax), %al
+ sarb $5, %al
+ movsbl %al, %eax
+
+while it could get this:
+
+ movsbl (%eax), %eax
+ sarl $5, %eax
+
+//===---------------------------------------------------------------------===//
+
+GCC PR31029:
+
+int test(int x) { return 1-x == x; } // --> return false
+int test2(int x) { return 2-x == x; } // --> return x == 1 ?
+
+Always foldable for odd constants, what is the rule for even?
+
+//===---------------------------------------------------------------------===//
+
+PR 3381: GEP to field of size 0 inside a struct could be turned into GEP
+for next field in struct (which is at same address).
+
+For example: store of float into { {{}}, float } could be turned into a store to
+the float directly.
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+double foo(double a) { return sin(a); }
+
+This compiles into this on x86-64 Linux:
+foo:
+ subq $8, %rsp
+ call sin
+ addq $8, %rsp
+ ret
+vs:
+
+foo:
+ jmp sin
+
+//===---------------------------------------------------------------------===//
+
+The arg promotion pass should make use of nocapture to make its alias analysis
+stuff much more precise.
+
+//===---------------------------------------------------------------------===//
+
+The following functions should be optimized to use a select instead of a
+branch (from gcc PR40072):
+
+char char_int(int m) {if(m>7) return 0; return m;}
+int int_char(char m) {if(m>7) return 0; return m;}
+
+//===---------------------------------------------------------------------===//
+
+Instcombine should replace the load with a constant in:
+
+ static const char x[4] = {'a', 'b', 'c', 'd'};
+
+ unsigned int y(void) {
+ return *(unsigned int *)x;
+ }
+
+It currently only does this transformation when the size of the constant
+is the same as the size of the integer (so, try x[5]) and the last byte
+is a null (making it a C string). There's no need for these restrictions.
+
+//===---------------------------------------------------------------------===//
+
+InstCombine's "turn load from constant into constant" optimization should be
+more aggressive in the presence of bitcasts. For example, because of unions,
+this code:
+
+union vec2d {
+ double e[2];
+ double v __attribute__((vector_size(16)));
+};
+typedef union vec2d vec2d;
+
+static vec2d a={{1,2}}, b={{3,4}};
+
+vec2d foo () {
+ return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
+}
+
+Compiles into:
+
+@a = internal constant %0 { [2 x double]
+ [double 1.000000e+00, double 2.000000e+00] }, align 16
+@b = internal constant %0 { [2 x double]
+ [double 3.000000e+00, double 4.000000e+00] }, align 16
+...
+define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind {
+entry:
+ %0 = load <2 x double>* getelementptr (%struct.vec2d*
+ bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16
+ %1 = load <2 x double>* getelementptr (%struct.vec2d*
+ bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16
+
+
+Instcombine should be able to optimize away the loads (and thus the globals).
+
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..394b4cd40e76
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMSparcAsmPrinter
+ SparcAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMSparcCodeGen n)
+
+add_dependencies(LLVMSparcAsmPrinter ${n})
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
new file mode 100644
index 000000000000..f12a6ac39891
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcAsmPrinter
+
+# Hack: we need to include 'main' Sparc target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
new file mode 100644
index 000000000000..61707f5556fb
--- /dev/null
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -0,0 +1,355 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+#include <cstring>
+#include <map>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+ class VISIBILITY_HIDDEN SparcAsmPrinter : public AsmPrinter {
+ /// We name each basic block in a Function with a unique number, so
+ /// that we can consistently refer to them later. This is cleared
+ /// at the beginning of each call to runOnMachineFunction().
+ ///
+ typedef std::map<const Value *, unsigned> ValueMapTy;
+ ValueMapTy NumberForBB;
+ public:
+ explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "Sparc Assembly Printer";
+ }
+
+ void printModuleLevelGV(const GlobalVariable* GVar);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printCCOperand(const MachineInstr *MI, int opNum);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+/// createSparcCodePrinterPass - Returns a pass that prints the SPARC
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // BBNumber is used here so that a given Printer will never give two
+ // BBs the same name. (If you have a better way, please let me know!)
+ static unsigned BBNumber = 0;
+
+ O << "\n\n";
+
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+ SwitchToSection(TAI->SectionForGlobal(F));
+ EmitAlignment(4, F);
+ O << "\t.globl\t" << CurrentFnName << '\n';
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ O << "\t.type\t" << CurrentFnName << ", #function\n";
+ O << CurrentFnName << ":\n";
+
+ // Number each basic block so that we can consistently refer to them
+ // in PC-relative references.
+ // FIXME: Why not use the MBB numbers?
+ NumberForBB.clear();
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ NumberForBB[I->getBasicBlock()] = BBNumber++;
+ }
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printInstruction(II);
+ ++EmittedInsts;
+ }
+ }
+
+ // We didn't modify anything.
+ return false;
+}
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand (opNum);
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ bool CloseParen = false;
+ if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+ O << "%hi(";
+ CloseParen = true;
+ } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+ !MO.isReg() && !MO.isImm()) {
+ O << "%lo(";
+ CloseParen = true;
+ }
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << "%" << LowercaseString (RI.get(MO.getReg()).AsmName);
+ else
+ O << "%reg" << MO.getReg();
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ << MO.getIndex();
+ break;
+ default:
+ O << "<unknown operand type>"; abort (); break;
+ }
+ if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ printOperand(MI, opNum);
+
+ // If this is an ADD operand, emit it like normal operands.
+ if (Modifier && !strcmp(Modifier, "arith")) {
+ O << ", ";
+ printOperand(MI, opNum+1);
+ return;
+ }
+
+ if (MI->getOperand(opNum+1).isReg() &&
+ MI->getOperand(opNum+1).getReg() == SP::G0)
+ return; // don't print "+%g0"
+ if (MI->getOperand(opNum+1).isImm() &&
+ MI->getOperand(opNum+1).getImm() == 0)
+ return; // don't print "+0"
+
+ O << "+";
+ if (MI->getOperand(opNum+1).isGlobal() ||
+ MI->getOperand(opNum+1).isCPI()) {
+ O << "%lo(";
+ printOperand(MI, opNum+1);
+ O << ")";
+ } else {
+ printOperand(MI, opNum+1);
+ }
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
+ int CC = (int)MI->getOperand(opNum).getImm();
+ O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+bool SparcAsmPrinter::doInitialization(Module &M) {
+ Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix());
+ return false; // success
+}
+
+bool SparcAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
+ printModuleLevelGV(I);
+
+ O << '\n';
+
+ return AsmPrinter::doFinalization(M);
+}
+
+void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ O << "\n\n";
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignment(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection()) {
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage())
+ O << "\t.local " << name << '\n';
+
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (1 << Align);
+
+ O << '\n';
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage: // FIXME: Verify correct for weak.
+ case GlobalValue::WeakODRLinkage: // FIXME: Verify correct for weak.
+ // Nonnull linkonce -> weak
+ O << "\t.weak " << name << '\n';
+ break;
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << TAI->getGlobalDirective() << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",#object\n";
+ O << "\t.size " << name << ',' << Size << '\n';
+ }
+
+ O << name << ":\n";
+ EmitGlobalConstant(C);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo);
+
+ return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo);
+ O << ']';
+
+ return false;
+}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
new file mode 100644
index 000000000000..eefa7e8f4d9a
--- /dev/null
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS Sparc.td)
+
+tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SparcGenRegisterNames.inc -gen-register-enums)
+tablegen(SparcGenRegisterInfo.inc -gen-register-desc)
+tablegen(SparcGenInstrNames.inc -gen-instr-enums)
+tablegen(SparcGenInstrInfo.inc -gen-instr-desc)
+tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
+tablegen(SparcGenDAGISel.inc -gen-dag-isel)
+tablegen(SparcGenSubtarget.inc -gen-subtarget)
+tablegen(SparcGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(SparcCodeGen
+ DelaySlotFiller.cpp
+ FPMover.cpp
+ SparcInstrInfo.cpp
+ SparcISelDAGToDAG.cpp
+ SparcISelLowering.cpp
+ SparcRegisterInfo.cpp
+ SparcSubtarget.cpp
+ SparcTargetAsmInfo.cpp
+ SparcTargetMachine.cpp
+ )
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 000000000000..15b26c29872f
--- /dev/null
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,76 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delayslotfiller"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "SPARC Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+ return new Filler(tm);
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, DebugLoc::getUnknownLoc(), TII->get(SP::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 000000000000..f72a4c4645c1
--- /dev/null
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,139 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+ struct FPMover : public MachineFunctionPass {
+ /// Target machine description which we query for reg. names, data
+ /// layout, etc.
+ ///
+ TargetMachine &TM;
+
+ static char ID;
+ explicit FPMover(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm) { }
+
+ virtual const char *getPassName() const {
+ return "Sparc Double-FP Move Fixer";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+ };
+ char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+ return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+ unsigned &OddReg) {
+ static const unsigned EvenHalvesOfPairs[] = {
+ SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+ SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+ };
+ static const unsigned OddHalvesOfPairs[] = {
+ SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+ SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+ };
+ static const unsigned DoubleRegsInOrder[] = {
+ SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+ SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+ };
+ for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ if (DoubleRegsInOrder[i] == DoubleReg) {
+ EvenReg = EvenHalvesOfPairs[i];
+ OddReg = OddHalvesOfPairs[i];
+ return;
+ }
+ assert(0 && "Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ MachineInstr *MI = I++;
+ DebugLoc dl = MI->getDebugLoc();
+ if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+ MI->getOpcode() == SP::FpNEGD) {
+ Changed = true;
+ unsigned DestDReg = MI->getOperand(0).getReg();
+ unsigned SrcDReg = MI->getOperand(1).getReg();
+ if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+ MBB.erase(MI); // Eliminate the noop copy.
+ ++NoopFpDs;
+ continue;
+ }
+
+ unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+ getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+ getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (MI->getOpcode() == SP::FpMOVD)
+ MI->setDesc(TII->get(SP::FMOVS));
+ else if (MI->getOpcode() == SP::FpNEGD)
+ MI->setDesc(TII->get(SP::FNEGS));
+ else if (MI->getOpcode() == SP::FpABSD)
+ MI->setDesc(TII->get(SP::FABSS));
+ else
+ assert(0 && "Unknown opcode!");
+
+ MI->getOperand(0).setReg(EvenDestReg);
+ MI->getOperand(1).setReg(EvenSrcReg);
+ DOUT << "FPMover: the modified instr is: " << *MI;
+ // Insert copy for the other half of the double.
+ if (DestDReg != SrcDReg) {
+ MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+ .addReg(OddSrcReg);
+ DOUT << "FPMover: the inserted instr is: " << *MI;
+ }
+ ++NumFpDs;
+ }
+ }
+ return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+ // If the target has V9 instructions, the fp-mover pseudos will never be
+ // emitted. Avoid a scan of the instructions to improve compile time.
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+}
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
new file mode 100644
index 000000000000..fdf6afaee076
--- /dev/null
+++ b/lib/Target/Sparc/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMSparcCodeGen
+TARGET = Sparc
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
+ SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
+ SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
+ SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
new file mode 100644
index 000000000000..cc24abf7286f
--- /dev/null
+++ b/lib/Target/Sparc/README.txt
@@ -0,0 +1,58 @@
+
+To-do
+-----
+
+* Keep the address of the constant pool in a register instead of forming its
+ address all of the time.
+* We can fold small constant offsets into the %hi/%lo references to constant
+ pool addresses as well.
+* When in V9 mode, register allocate %icc[0-3].
+* Add support for isel'ing UMUL_LOHI instead of marking it as Expand.
+* Emit the 'Branch on Integer Register with Prediction' instructions. It's
+ not clear how to write a pattern for this though:
+
+float %t1(int %a, int* %p) {
+ %C = seteq int %a, 0
+ br bool %C, label %T, label %F
+T:
+ store int 123, int* %p
+ br label %F
+F:
+ ret float undef
+}
+
+codegens to this:
+
+t1:
+ save -96, %o6, %o6
+1) subcc %i0, 0, %l0
+1) bne .LBBt1_2 ! F
+ nop
+.LBBt1_1: ! T
+ or %g0, 123, %l0
+ st %l0, [%i1]
+.LBBt1_2: ! F
+ restore %g0, %g0, %g0
+ retl
+ nop
+
+1) should be replaced with a brz in V9 mode.
+
+* Same as above, but emit conditional move on register zero (p192) in V9
+ mode. Testcase:
+
+int %t1(int %a, int %b) {
+ %C = seteq int %a, 0
+ %D = select bool %C, int %a, int %b
+ ret int %D
+}
+
+* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling
+ with the Y register, if they are faster.
+
+* Codegen bswap(load)/store(bswap) -> load/store ASI
+
+* Implement frame pointer elimination, e.g. eliminate save/restore for
+ leaf fns.
+* Fill delay slots
+
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
new file mode 100644
index 000000000000..bb03f30f2dd6
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,119 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+ class FunctionPass;
+ class SparcTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+ FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+ FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+} // end namespace llvm;
+
+// Defines symbolic names for Sparc registers. This defines a mapping from
+// register name to register number.
+//
+#include "SparcGenRegisterNames.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#include "SparcGenInstrNames.inc"
+
+
+namespace llvm {
+ // Enums corresponding to Sparc condition codes, both icc's and fcc's. These
+ // values must be kept in sync with the ones in the .td file.
+ namespace SPCC {
+ enum CondCodes {
+ //ICC_A = 8 , // Always
+ //ICC_N = 0 , // Never
+ ICC_NE = 9 , // Not Equal
+ ICC_E = 1 , // Equal
+ ICC_G = 10 , // Greater
+ ICC_LE = 2 , // Less or Equal
+ ICC_GE = 11 , // Greater or Equal
+ ICC_L = 3 , // Less
+ ICC_GU = 12 , // Greater Unsigned
+ ICC_LEU = 4 , // Less or Equal Unsigned
+ ICC_CC = 13 , // Carry Clear/Great or Equal Unsigned
+ ICC_CS = 5 , // Carry Set/Less Unsigned
+ ICC_POS = 14 , // Positive
+ ICC_NEG = 6 , // Negative
+ ICC_VC = 15 , // Overflow Clear
+ ICC_VS = 7 , // Overflow Set
+
+ //FCC_A = 8+16, // Always
+ //FCC_N = 0+16, // Never
+ FCC_U = 7+16, // Unordered
+ FCC_G = 6+16, // Greater
+ FCC_UG = 5+16, // Unordered or Greater
+ FCC_L = 4+16, // Less
+ FCC_UL = 3+16, // Unordered or Less
+ FCC_LG = 2+16, // Less or Greater
+ FCC_NE = 1+16, // Not Equal
+ FCC_E = 9+16, // Equal
+ FCC_UE = 10+16, // Unordered or Equal
+ FCC_GE = 11+16, // Greater or Equal
+ FCC_UGE = 12+16, // Unordered or Greater or Equal
+ FCC_LE = 13+16, // Less or Equal
+ FCC_ULE = 14+16, // Unordered or Less or Equal
+ FCC_O = 15+16 // Ordered
+ };
+ }
+
+ inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case SPCC::ICC_NE: return "ne";
+ case SPCC::ICC_E: return "e";
+ case SPCC::ICC_G: return "g";
+ case SPCC::ICC_LE: return "le";
+ case SPCC::ICC_GE: return "ge";
+ case SPCC::ICC_L: return "l";
+ case SPCC::ICC_GU: return "gu";
+ case SPCC::ICC_LEU: return "leu";
+ case SPCC::ICC_CC: return "cc";
+ case SPCC::ICC_CS: return "cs";
+ case SPCC::ICC_POS: return "pos";
+ case SPCC::ICC_NEG: return "neg";
+ case SPCC::ICC_VC: return "vc";
+ case SPCC::ICC_VS: return "vs";
+ case SPCC::FCC_U: return "u";
+ case SPCC::FCC_G: return "g";
+ case SPCC::FCC_UG: return "ug";
+ case SPCC::FCC_L: return "l";
+ case SPCC::FCC_UL: return "ul";
+ case SPCC::FCC_LG: return "lg";
+ case SPCC::FCC_NE: return "ne";
+ case SPCC::FCC_E: return "e";
+ case SPCC::FCC_UE: return "ue";
+ case SPCC::FCC_GE: return "ge";
+ case SPCC::FCC_UGE: return "uge";
+ case SPCC::FCC_LE: return "le";
+ case SPCC::FCC_ULE: return "ule";
+ case SPCC::FCC_O: return "o";
+ }
+ }
+} // end namespace llvm
+#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
new file mode 100644
index 000000000000..53ea8f4a35f0
--- /dev/null
+++ b/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,76 @@
+//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+
+def FeatureV9
+ : SubtargetFeature<"v9", "IsV9", "true",
+ "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+ : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+ : SubtargetFeature<"vis", "IsVIS", "true",
+ "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo {
+ // Define how we want to layout our target-specific information field.
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"v8", []>;
+def : Proc<"supersparc", []>;
+def : Proc<"sparclite", []>;
+def : Proc<"f934", []>;
+def : Proc<"hypersparc", []>;
+def : Proc<"sparclite86x", []>;
+def : Proc<"sparclet", []>;
+def : Proc<"tsc701", []>;
+def : Proc<"v9", [FeatureV9]>;
+def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = SparcInstrInfo;
+}
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 000000000000..33ecfdf5f750
--- /dev/null
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,32 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+ // All arguments get passed in integer registers if there is space.
+ CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+
+ // Alternatively, they are assigned to the stack in 4-byte aligned units.
+ CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 000000000000..c9bd62d0e20d
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,215 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+ /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const SparcSubtarget &Subtarget;
+public:
+ explicit SparcDAGToDAGISel(SparcTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Subtarget(TM.getSubtarget<SparcSubtarget>()) {
+ }
+
+ SDNode *Select(SDValue Op);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue Op, SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Offset);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "SPARC DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void SparcDAGToDAGISel::InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+ CurDAG->RemoveDeadNodes();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_simm13(CN)) {
+ if (FrameIndexSDNode *FIN =
+ dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ // Constant offset from frame ref.
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(1);
+ Offset = Addr.getOperand(0).getOperand(0);
+ return true;
+ }
+ if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1).getOperand(0);
+ return true;
+ }
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Op, SDValue Addr,
+ SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() == ISD::FrameIndex) return false;
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
+ Predicate_simm13(Addr.getOperand(1).getNode()))
+ return false; // Let the reg+imm pattern catch this!
+ if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+ Addr.getOperand(1).getOpcode() == SPISD::Lo)
+ return false; // Let the reg+imm pattern catch this!
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ return true;
+ }
+
+ R1 = Addr;
+ R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::SDIV:
+ case ISD::UDIV: {
+ // FIXME: should use a custom expander to expose the SRA to the dag.
+ SDValue DivLHS = N->getOperand(0);
+ SDValue DivRHS = N->getOperand(1);
+
+ // Set the Y register to the high-part.
+ SDValue TopPart;
+ if (N->getOpcode() == ISD::SDIV) {
+ TopPart = SDValue(CurDAG->getTargetNode(SP::SRAri, dl, MVT::i32, DivLHS,
+ CurDAG->getTargetConstant(31, MVT::i32)), 0);
+ } else {
+ TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+ }
+ TopPart = SDValue(CurDAG->getTargetNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+ CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+ // FIXME: Handle div by immediate.
+ unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+ return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+ TopPart);
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ // FIXME: Handle mul by immediate.
+ SDValue MulLHS = N->getOperand(0);
+ SDValue MulRHS = N->getOperand(1);
+ unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+ SDNode *Mul = CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::Flag,
+ MulLHS, MulRHS);
+ // The high part is in the Y register.
+ return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+ return NULL;
+ }
+ }
+
+ return SelectCode(Op);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectADDRrr(Op, Op, Op0, Op1))
+ SelectADDRri(Op, Op, Op0, Op1);
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+ return new SparcDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 000000000000..3ec7e06f0985
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1049 @@
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SparcGenCallingConv.inc"
+
+static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, DAG.getTarget(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Sparc32);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums.
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+/// LowerArguments - V8 uses a very simple ABI, where all values are passed in
+/// either one or two GPRs, including FP values. TODO: we should pass FP values
+/// in FP registers for fastcc functions.
+void
+SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+
+ const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
+ unsigned ArgOffset = 68;
+
+ SDValue Root = DAG.getRoot();
+ std::vector<SDValue> OutChains;
+
+ for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+ MVT ObjectVT = getValueType(I->getType());
+
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
+ SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ if (ObjectVT != MVT::i32) {
+ unsigned AssertOp = ISD::AssertSext;
+ Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
+ DAG.getValueType(ObjectVT));
+ Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
+ }
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Load;
+ if (ObjectVT == MVT::i32) {
+ Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ } else {
+ ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+
+ // Sparc is big endian, so add an offset based on the ObjectVT.
+ unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
+ FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+ DAG.getConstant(Offset, MVT::i32));
+ Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr,
+ NULL, 0, ObjectVT);
+ Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
+ }
+ ArgValues.push_back(Load);
+ }
+
+ ArgOffset += 4;
+ break;
+ case MVT::f32:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ // FP value is passed in an integer register.
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
+ SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
+ ArgValues.push_back(Arg);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0);
+ ArgValues.push_back(Load);
+ }
+ ArgOffset += 4;
+ break;
+
+ case MVT::i64:
+ case MVT::f64:
+ if (I->use_empty()) { // Argument is dead.
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ if (CurArgReg < ArgRegEnd) ++CurArgReg;
+ ArgValues.push_back(DAG.getUNDEF(ObjectVT));
+ } else {
+ SDValue HiVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
+ HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ }
+
+ SDValue LoVal;
+ if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR
+ unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
+ LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32);
+ } else {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+ LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0);
+ }
+
+ // Compose the two halves together into an i64 unit.
+ SDValue WholeValue =
+ DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+
+ // If we want a double, do a bit convert.
+ if (ObjectVT == MVT::f64)
+ WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
+
+ ArgValues.push_back(WholeValue);
+ }
+ ArgOffset += 8;
+ break;
+ }
+ }
+
+ // Store remaining ArgRegs to the stack if this is a varargs function.
+ if (F.isVarArg()) {
+ // Remember the vararg offset for the va_start implementation.
+ VarArgsFrameOffset = ArgOffset;
+
+ for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+ unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+ MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+ SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
+ ArgOffset += 4;
+ }
+ }
+
+ if (!OutChains.empty())
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size()));
+}
+
+static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+#if 0
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallingConv, isVarArg, DAG.getTarget(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(Op.getNode(), CC_Sparc32);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+ // FIXME: We can't use this until f64 is known to take two GPRs.
+#else
+ (void)CC_Sparc32;
+
+ // Count the size of the outgoing arguments.
+ unsigned ArgsSize = 0;
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ switch (TheCall->getArg(i).getValueType().getSimpleVT()) {
+ default: assert(0 && "Unknown value type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::f32:
+ ArgsSize += 4;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ ArgsSize += 8;
+ break;
+ }
+ }
+ if (ArgsSize > 4*6)
+ ArgsSize -= 4*6; // Space for first 6 arguments is prereserved.
+ else
+ ArgsSize = 0;
+#endif
+
+ // Keep stack frames 8-byte aligned.
+ ArgsSize = (ArgsSize+7) & ~7;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+#if 0
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ // FIXME: VERIFY THAT 68 IS RIGHT.
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+
+#else
+ static const unsigned ArgRegs[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+ unsigned ArgOffset = 68;
+
+ for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) {
+ SDValue Val = TheCall->getArg(i);
+ MVT ObjectVT = Val.getValueType();
+ SDValue ValToStore(0, 0);
+ unsigned ObjSize;
+ switch (ObjectVT.getSimpleVT()) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ ObjSize = 4;
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
+ }
+ break;
+ case MVT::f32:
+ ObjSize = 4;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val;
+ } else {
+ // Convert this to a FP value in an int reg.
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val);
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
+ }
+ break;
+ case MVT::f64: {
+ ObjSize = 8;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val; // Whole thing is passed in memory.
+ break;
+ }
+
+ // Break into top and bottom parts by storing to the stack and loading
+ // out the parts as integers. Top part goes in a reg.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Val, StackPtr, NULL, 0);
+ // Sparc is big-endian, so the high part comes first.
+ SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+ // Increment the pointer to the other half.
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getIntPtrConstant(4));
+ // Load the low part.
+ SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Lo;
+ ArgOffset += 4;
+ ObjSize = 4;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ }
+ break;
+ }
+ case MVT::i64: {
+ ObjSize = 8;
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Val; // Whole thing is passed in memory.
+ break;
+ }
+
+ // Split the value into top and bottom part. Top part goes in a reg.
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
+ DAG.getConstant(1, MVT::i32));
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
+ DAG.getConstant(0, MVT::i32));
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
+
+ if (RegsToPass.size() >= 6) {
+ ValToStore = Lo;
+ ArgOffset += 4;
+ ObjSize = 4;
+ } else {
+ RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+ }
+ break;
+ }
+ }
+
+ if (ValToStore.getNode()) {
+ SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+ SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore,
+ PtrOff, NULL, 0));
+ }
+ ArgOffset += ObjSize;
+ }
+#endif
+
+ // Emit all stores, make sure the occur before any copies into physregs.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ unsigned Reg = RegsToPass[i].first;
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ std::vector<MVT> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+ SDValue Ops[] = { Chain, Callee, InFlag };
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CallingConv, isVarArg, DAG.getTarget(), RVLocs);
+
+ RVInfo.AnalyzeCallResult(TheCall, RetCC_Sparc32);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ unsigned Reg = RVLocs[i].getLocReg();
+
+ // Remap I0->I7 -> O0->O7.
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ Reg = Reg-SP::I0+SP::O0;
+
+ Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ TheCall->getVTList(), &ResultVals[0],
+ ResultVals.size());
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown integer condition code!");
+ case ISD::SETEQ: return SPCC::ICC_E;
+ case ISD::SETNE: return SPCC::ICC_NE;
+ case ISD::SETLT: return SPCC::ICC_L;
+ case ISD::SETGT: return SPCC::ICC_G;
+ case ISD::SETLE: return SPCC::ICC_LE;
+ case ISD::SETGE: return SPCC::ICC_GE;
+ case ISD::SETULT: return SPCC::ICC_CS;
+ case ISD::SETULE: return SPCC::ICC_LEU;
+ case ISD::SETUGT: return SPCC::ICC_GU;
+ case ISD::SETUGE: return SPCC::ICC_CC;
+ }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return SPCC::FCC_E;
+ case ISD::SETNE:
+ case ISD::SETUNE: return SPCC::FCC_NE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return SPCC::FCC_L;
+ case ISD::SETGT:
+ case ISD::SETOGT: return SPCC::FCC_G;
+ case ISD::SETLE:
+ case ISD::SETOLE: return SPCC::FCC_LE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return SPCC::FCC_GE;
+ case ISD::SETULT: return SPCC::FCC_UL;
+ case ISD::SETULE: return SPCC::FCC_ULE;
+ case ISD::SETUGT: return SPCC::FCC_UG;
+ case ISD::SETUGE: return SPCC::FCC_UGE;
+ case ISD::SETUO: return SPCC::FCC_U;
+ case ISD::SETO: return SPCC::FCC_O;
+ case ISD::SETONE: return SPCC::FCC_LG;
+ case ISD::SETUEQ: return SPCC::FCC_UE;
+ }
+}
+
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+ addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+ addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+ // Turn FP extload into load/fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Sparc doesn't have i1 sign extending load
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into LO/HI parts.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+ // Sparc doesn't have sext_inreg, replace them with shl/sra
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+ // Sparc has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // Custom expand fp<->sint
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+ // Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+ // Sparc has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f32, Expand);
+ setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+ // Sparc doesn't have BRCOND either, it has BR_CC.
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // SPARC has no intrinsics for these particular operations.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // FIXME: Sparc provides these multiplies, but we don't have them yet.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ // VAARG needs to be lowered to not do unaligned accesses for doubles.
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+
+ // No debug info support yet.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::DECLARE, MVT::Other, Expand);
+
+ setStackPointerRegisterToSaveRestore(SP::O6);
+
+ if (TM.getSubtarget<SparcSubtarget>().isV9())
+ setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+ computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPISD::CMPICC: return "SPISD::CMPICC";
+ case SPISD::CMPFCC: return "SPISD::CMPFCC";
+ case SPISD::BRICC: return "SPISD::BRICC";
+ case SPISD::BRFCC: return "SPISD::BRFCC";
+ case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+ case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+ case SPISD::Hi: return "SPISD::Hi";
+ case SPISD::Lo: return "SPISD::Lo";
+ case SPISD::FTOI: return "SPISD::FTOI";
+ case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::CALL: return "SPISD::CALL";
+ case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ APInt KnownZero2, KnownOne2;
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case SPISD::SELECT_ICC:
+ case SPISD::SELECT_FCC:
+ DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+ Depth+1);
+ DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+ Depth+1);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+ ISD::CondCode CC, unsigned &SPCC) {
+ if (isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ CC == ISD::SETNE &&
+ ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+ (LHS.getOpcode() == SPISD::SELECT_FCC &&
+ LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+ isa<ConstantSDNode>(LHS.getOperand(0)) &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ SDValue CMPCC = LHS.getOperand(3);
+ SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+ LHS = CMPCC.getOperand(0);
+ RHS = CMPCC.getOperand(1);
+ }
+}
+
+static SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) {
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+}
+
+static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ Constant *C = N->getConstVal();
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+ SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+ SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Convert the fp value to integer in an FP register.
+ assert(Op.getValueType() == MVT::i32);
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert(Op.getOperand(0).getValueType() == MVT::i32);
+ SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+ // Convert the int value to FP in an FP register.
+ return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a br_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ // Get the condition flag.
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT> VTs;
+ VTs.push_back(MVT::i32);
+ VTs.push_back(MVT::Flag);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc, SPCC = ~0U;
+
+ // If this is a select_cc of a "setcc", and if the setcc got lowered into
+ // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+ LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ std::vector<MVT> VTs;
+ VTs.push_back(LHS.getValueType()); // subcc returns a value
+ VTs.push_back(MVT::Flag);
+ SDValue Ops[2] = { LHS, RHS };
+ CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+ Opc = SPISD::SELECT_ICC;
+ if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ SparcTargetLowering &TLI) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getRegister(SP::I6, MVT::i32),
+ DAG.getConstant(TLI.getVarArgsFrameOffset(),
+ MVT::i32));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ MVT VT = Node->getValueType(0);
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+ DAG.getConstant(VT.getSizeInBits()/8,
+ MVT::i32));
+ // Store the incremented VAList to the legalized pointer
+ InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ VAListPtr, SV, 0);
+ // Load the actual argument out of the pointer VAList, unless this is an
+ // f64 load.
+ if (VT != MVT::f64)
+ return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);
+
+ // Otherwise, load it as i64, then do a bitconvert.
+ SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);
+
+ // Bit-Convert the value to f64.
+ SDValue Ops[2] = {
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
+ V.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Chain = Op.getOperand(0); // Legalize the chain.
+ SDValue Size = Op.getOperand(1); // Legalize the size.
+ DebugLoc dl = Op.getDebugLoc();
+
+ unsigned SPReg = SP::O6;
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
+
+ // The resultant pointer is actually 16 words from the bottom of the stack,
+ // to provide a register spill area.
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+ DAG.getConstant(96, MVT::i32));
+ SDValue Ops[2] = { NewVal, Chain };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ // Frame & Return address. Currently unimplemented
+ case ISD::RETURNADDR: return SDValue();
+ case ISD::FRAMEADDR: return SDValue();
+ case ISD::GlobalTLSAddress:
+ assert(0 && "TLS not implemented for Sparc.");
+ case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
+ case ISD::ConstantPool: return LowerCONSTANTPOOL(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ unsigned BROpcode;
+ unsigned CC;
+ DebugLoc dl = MI->getDebugLoc();
+ // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ BROpcode = SP::BCOND;
+ break;
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ BROpcode = SP::FBCOND;
+ break;
+ }
+
+ CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // [f]bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ }
+ }
+
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, SP::IntRegsRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> SparcTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default: break;
+ case 'r':
+ return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3,
+ SP::L4, SP::L5, SP::L6, SP::L7,
+ SP::I0, SP::I1, SP::I2, SP::I3,
+ SP::I4, SP::I5,
+ SP::O0, SP::O1, SP::O2, SP::O3,
+ SP::O4, SP::O5, SP::O7, 0);
+ }
+
+ return std::vector<unsigned>();
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The Sparc target isn't yet aware of offsets.
+ return false;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 000000000000..fe6811f8c370
--- /dev/null
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,79 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+ namespace SPISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMPICC, // Compare two GPR operands, set icc.
+ CMPFCC, // Compare two FP operands, set fcc.
+ BRICC, // Branch to dest on icc condition
+ BRFCC, // Branch to dest on fcc condition
+ SELECT_ICC, // Select between two values using the current ICC flags.
+ SELECT_FCC, // Select between two values using the current FCC flags.
+
+ Hi, Lo, // Hi/Lo operations, typically on a global address.
+
+ FTOI, // FP to Int within a FP register.
+ ITOF, // Int to FP within a FP register.
+
+ CALL, // A call instruction.
+ RET_FLAG // Return with a flag operand.
+ };
+ }
+
+ class SparcTargetLowering : public TargetLowering {
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ public:
+ SparcTargetLowering(TargetMachine &TM);
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual void LowerArguments(Function &F, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &ArgValues,
+ DebugLoc dl);
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ };
+} // end namespace llvm
+
+#endif // SPARC_ISELLOWERING_H
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 000000000000..6535259e16ff
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SP";
+
+ bits<2> op;
+ let Inst{31-30} = op; // Top two bits are the 'op' field
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<3> op2;
+ bits<22> imm22;
+ let op = 0; // op = 0
+ let Inst{24-22} = op2;
+ let Inst{21-0} = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : F2<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op2 = op2Val;
+
+ let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bit annul = 0; // currently unused
+
+ let cond = condVal;
+ let op2 = op2Val;
+
+ let Inst{29} = annul;
+ let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+ bits<6> op3;
+ bits<5> rs1;
+ let op{1} = 1; // Op = 2 or 3
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+ let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<8> asi = 0; // asi not currently used
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 0; // i field = 0
+ let Inst{12-5} = asi; // address space identifier
+ let Inst{4-0} = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<13> simm13;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13} = 1; // i field = 1
+ let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 000000000000..d2f6b9bdcb3d
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,277 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "SparcGenInstrInfo.inc"
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+ : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)),
+ RI(ST, *this), Subtarget(ST) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSR, unsigned &DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ // We look for 3 kinds of patterns here:
+ // or with G0 or 0
+ // add with G0 or 0
+ // fmovs or FpMOVD (pseudo double move).
+ if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
+ if (MI.getOperand(1).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).getReg() == SP::G0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
+ isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
+ MI.getOpcode() == SP::FMOVD) {
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::LDri ||
+ MI->getOpcode() == SP::LDFri ||
+ MI->getOpcode() == SP::LDDFri) {
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (MI->getOpcode() == SP::STri ||
+ MI->getOpcode() == SP::STFri ||
+ MI->getOpcode() == SP::STDFri) {
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(1).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ }
+ return 0;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const{
+ // FIXME this should probably take a DebugLoc argument
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB);
+ return 1;
+}
+
+bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ if (DestRC != SrcRC) {
+ // Not yet supported!
+ return false;
+ }
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
+ else if (DestRC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg);
+ else if (DestRC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg)
+ .addReg(SrcReg);
+ else
+ // Can't copy this register
+ return false;
+
+ return true;
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ else
+ assert(0 && "Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (RC == SP::IntRegsRegisterClass)
+ Opc = SP::STri;
+ else if (RC == SP::FPRegsRegisterClass)
+ Opc = SP::STFri;
+ else if (RC == SP::DFPRegsRegisterClass)
+ Opc = SP::STDFri;
+ else
+ assert(0 && "Can't load this register");
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+ return;
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (RC == SP::IntRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::FPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else if (RC == SP::DFPRegsRegisterClass)
+ BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+ else
+ assert(0 && "Can't load this register from stack slot");
+}
+
+void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ unsigned Opc = 0;
+ if (RC == SP::IntRegsRegisterClass)
+ Opc = SP::LDri;
+ else if (RC == SP::FPRegsRegisterClass)
+ Opc = SP::LDFri;
+ else if (RC == SP::DFPRegsRegisterClass)
+ Opc = SP::LDDFri;
+ else
+ assert(0 && "Can't load this register");
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+ return;
+}
+
+MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ unsigned OpNum = Ops[0];
+ bool isFloat = false;
+ MachineInstr *NewMI = NULL;
+ switch (MI->getOpcode()) {
+ case SP::ORrr:
+ if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&&
+ MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) {
+ if (OpNum == 0) // COPY -> STORE
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(MI->getOperand(2).getReg());
+ else // COPY -> LOAD
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri),
+ MI->getOperand(0).getReg())
+ .addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ case SP::FMOVS:
+ isFloat = true;
+ // FALLTHROUGH
+ case SP::FMOVD:
+ if (OpNum == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ NewMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isFloat ? SP::STFri : SP::STDFri))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ NewMI = BuildMI(MF, MI->getDebugLoc(),
+ get(isFloat ? SP::LDFri : SP::LDDFri))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+ .addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ }
+
+ return NewMI;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 000000000000..ab661b991d74
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,114 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+ enum {
+ Pseudo = (1<<0),
+ Load = (1<<1),
+ Store = (1<<2),
+ DelaySlot = (1<<3)
+ };
+}
+
+class SparcInstrInfo : public TargetInstrInfoImpl {
+ const SparcRegisterInfo RI;
+ const SparcSubtarget& Subtarget;
+public:
+ explicit SparcInstrInfo(SparcSubtarget &ST);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+};
+
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 000000000000..2d6c9209e6ae
--- /dev/null
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,769 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions. Note that the machine may be running in 32-bit mode.
+def HasV9 : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions. Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate. In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11 : PatLeaf<(imm), [{
+ // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
+ return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
+ (int)N->getZExtValue();
+}]>;
+
+def simm13 : PatLeaf<(imm), [{
+ // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
+ return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
+ (int)N->getZExtValue();
+}]>;
+
+def LO10 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+ MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+ return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+ (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+ def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc :
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc :
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+
+def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def call : SDNode<"SPISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def retflag : SDNode<"SPISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE : ICC_VAL< 9>; // Not Equal
+def ICC_E : ICC_VAL< 1>; // Equal
+def ICC_G : ICC_VAL<10>; // Greater
+def ICC_LE : ICC_VAL< 2>; // Less or Equal
+def ICC_GE : ICC_VAL<11>; // Greater or Equal
+def ICC_L : ICC_VAL< 3>; // Less
+def ICC_GU : ICC_VAL<12>; // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>; // Less or Equal Unsigned
+def ICC_CC : ICC_VAL<13>; // Carry Clear/Great or Equal Unsigned
+def ICC_CS : ICC_VAL< 5>; // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>; // Positive
+def ICC_NEG : ICC_VAL< 6>; // Negative
+def ICC_VC : ICC_VAL<15>; // Overflow Clear
+def ICC_VS : ICC_VAL< 7>; // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U : FCC_VAL<23>; // Unordered
+def FCC_G : FCC_VAL<22>; // Greater
+def FCC_UG : FCC_VAL<21>; // Unordered or Greater
+def FCC_L : FCC_VAL<20>; // Less
+def FCC_UL : FCC_VAL<19>; // Unordered or Less
+def FCC_LG : FCC_VAL<18>; // Less or Greater
+def FCC_NE : FCC_VAL<17>; // Not Equal
+def FCC_E : FCC_VAL<25>; // Equal
+def FCC_UE : FCC_VAL<24>; // Unordered or Equal
+def FCC_GE : FCC_VAL<25>; // Greater or Equal
+def FCC_UGE : FCC_VAL<26>; // Unordered or Greater or Equal
+def FCC_LE : FCC_VAL<27>; // Less or Equal
+def FCC_ULE : FCC_VAL<28>; // Unordered or Less or Equal
+def FCC_O : FCC_VAL<29>; // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"),
+ [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+ def rr : F3_1<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+ def ri : F3_2<2, Op3Val,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern>;
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ "!ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
+// fpmover pass.
+let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
+ def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpMOVD $src, $dst", []>;
+ def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpNEGD $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "!FpABSD $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence. This has to handle all permutations of
+// selection between i32/f32/f64 on ICC and FCC.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def SELECT_CC_Int_ICC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_ICC PSEUDO!",
+ [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_Int_FCC
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_Int_FCC PSEUDO!",
+ [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_ICC PSEUDO!",
+ [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_FP_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_FP_FCC PSEUDO!",
+ [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_ICC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+ def SELECT_CC_DFP_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_DFP_FCC PSEUDO!",
+ [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+ imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in {
+ let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
+ def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsb [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldsh [$addr], $dst",
+ [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldub [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "lduh [$addr], $dst",
+ [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri : F3_2<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr : F3_1<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMrr:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri : F3_2<3, 0b100000,
+ (outs FPRegs:$dst), (ins MEMri:$addr),
+ "ld [$addr], $dst",
+ [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMrr:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+ (outs DFPRegs:$dst), (ins MEMri:$addr),
+ "ldd [$addr], $dst",
+ [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "stb $src, [$addr]",
+ [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "sth $src, [$addr]",
+ [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr : F3_1<3, 0b000100,
+ (outs), (ins MEMrr:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri : F3_2<3, 0b000100,
+ (outs), (ins MEMri:$addr, IntRegs:$src),
+ "st $src, [$addr]",
+ [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr : F3_1<3, 0b100100,
+ (outs), (ins MEMrr:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri : F3_2<3, 0b100100,
+ (outs), (ins MEMri:$addr, FPRegs:$src),
+ "st $src, [$addr]",
+ [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr : F3_1<3, 0b100111,
+ (outs), (ins MEMrr:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri : F3_2<3, 0b100111,
+ (outs), (ins MEMri:$addr, DFPRegs:$src),
+ "std $src, [$addr]",
+ [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+ (outs IntRegs:$dst), (ins i32imm:$src),
+ "sethi $src, $dst",
+ [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+ def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND : F3_12<"and", 0b000001, and>;
+
+def ANDNrr : F3_1<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "andn $b, $c, $dst",
+ [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri : F3_2<2, 0b000101,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "andn $b, $c, $dst", []>;
+
+defm OR : F3_12<"or", 0b000010, or>;
+
+def ORNrr : F3_1<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "orn $b, $c, $dst",
+ [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri : F3_2<2, 0b000110,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "orn $b, $c, $dst", []>;
+defm XOR : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr : F3_1<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "xnor $b, $c, $dst",
+ [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri : F3_2<2, 0b000111,
+ (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+ "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+defm ADDCC : F3_12<"addcc", 0b010000, addc>;
+defm ADDX : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB : F3_12 <"sub" , 0b000100, sub>;
+defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>;
+
+def SUBXCCrr: F3_1<2, 0b011100,
+ (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+ "subxcc $b, $c, $dst", []>;
+
+// Section B.18 - Multiply Instructions, p. 113
+defm UMUL : F3_12np<"umul", 0b001010>;
+defm SMUL : F3_12 <"smul", 0b001011, mul>;
+
+
+// Section B.19 - Divide Instructions, p. 115
+defm UDIV : F3_12np<"udiv", 0b001110>;
+defm SDIV : F3_12np<"sdiv", 0b001111>;
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE : F3_12np<"save" , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+ def BA : BranchSP<0b1000, (ins brtarget:$dst),
+ "ba $dst",
+ [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "b$cc $dst",
+ [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+ "fb$cc $dst",
+ [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O0, O1, O2, O3, O4, O5],
+ hasDelaySlot = 1, isCall = 1,
+ Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+ D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in {
+ def CALL : InstSP<(outs), (ins calltarget:$dst),
+ "call $dst", []> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
+ }
+
+ // indirect calls
+ def JMPLrr : F3_1<2, 0b111000,
+ (outs), (ins MEMrr:$ptr),
+ "call $ptr",
+ [(call ADDRrr:$ptr)]>;
+ def JMPLri : F3_2<2, 0b111000,
+ (outs), (ins MEMri:$ptr),
+ "call $ptr",
+ [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+def RDY : F3_1<2, 0b101000,
+ (outs IntRegs:$dst), (ins),
+ "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+def WRYrr : F3_1<2, 0b110000,
+ (outs), (ins IntRegs:$b, IntRegs:$c),
+ "wr $b, $c, %y", []>;
+def WRYri : F3_2<2, 0b110000,
+ (outs), (ins IntRegs:$b, i32imm:$c),
+ "wr $b, $c, %y", []>;
+
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fitos $src, $dst",
+ [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fitod $src, $dst",
+ [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fstoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstod $src, $dst",
+ [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fdtos $src, $dst",
+ [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fnegs $src, $dst",
+ [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fabss $src, $dst",
+ [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+ (outs FPRegs:$dst), (ins FPRegs:$src),
+ "fsqrts $src, $dst",
+ [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fsqrtd $src, $dst",
+ [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS : F3_3<2, 0b110100, 0b001000001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fadds $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD : F3_3<2, 0b110100, 0b001000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "faddd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS : F3_3<2, 0b110100, 0b001000101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsubs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD : F3_3<2, 0b110100, 0b001000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fsubd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS : F3_3<2, 0b110100, 0b001001001,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fmuls $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD : F3_3<2, 0b110100, 0b001001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fsmuld $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+ (fextend FPRegs:$src2)))]>;
+def FDIVS : F3_3<2, 0b110100, 0b001001101,
+ (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+ "fdivs $src1, $src2, $dst",
+ [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD : F3_3<2, 0b110100, 0b001001110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdivd $src1, $src2, $dst",
+ [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ (outs), (ins FPRegs:$src1, FPRegs:$src2),
+ "fcmps $src1, $src2\n\tnop",
+ [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fcmpd $src1, $src2\n\tnop",
+ [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], isTwoAddress = 1 in {
+ // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+ // FIXME: Add instruction encodings for the JIT some day.
+ def MOVICCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVICCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %icc, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def MOVFCCrr
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+ def MOVFCCri
+ : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+ "mov$cc %fcc0, $F, $dst",
+ [(set IntRegs:$dst,
+ (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+
+ def FMOVS_ICC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %icc, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_ICC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %icc, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+ def FMOVS_FCC
+ : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+ "fmovs$cc %fcc0, $F, $dst",
+ [(set FPRegs:$dst,
+ (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+ def FMOVD_FCC
+ : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+ "fmovd$cc %fcc0, $F, $dst",
+ [(set DFPRegs:$dst,
+ (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+ def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fmovd $src, $dst", []>;
+ def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fnegd $src, $dst",
+ [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+ def FABSD : F3_3<2, 0b110100, 0b000001010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fabsd $src, $dst",
+ [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
+// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+ (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+ (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+ (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+ (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+ (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo. This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+ (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+ (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls:
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+ (CALL texternalsym:$dst)>;
+
+def : Pat<(ret), (RETL)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 000000000000..59efb19ab9c5
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,196 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+ const TargetInstrInfo &tii)
+ : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+ Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SP::G2);
+ Reserved.set(SP::G3);
+ Reserved.set(SP::G4);
+ Reserved.set(SP::O6);
+ Reserved.set(SP::I6);
+ Reserved.set(SP::I7);
+ Reserved.set(SP::G0);
+ Reserved.set(SP::G5);
+ Reserved.set(SP::G6);
+ Reserved.set(SP::G7);
+ return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+ return CalleeSavedRegClasses;
+}
+
+bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return false;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ MachineInstr &MI = *I;
+ DebugLoc dl = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ if (Size)
+ BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ MBB.erase(I);
+}
+
+void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ // Addressable stack objects are accessed using neg. offsets from %fp
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(i+1).getImm();
+
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(i).ChangeToRegister(SP::I6, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ } else {
+ // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
+ // scavenge a register here instead of reserving G1 all of the time.
+ unsigned OffHi = (unsigned)Offset >> 10U;
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(SP::I6);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(i).ChangeToRegister(SP::G1, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Get the number of bytes to allocate from the FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
+
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = (NumBytes + 7) & ~7;
+ NumBytes = -NumBytes;
+
+ if (NumBytes >= -4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ } else {
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ unsigned OffHi = (unsigned)NumBytes >> 10U;
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+ // Emit G1 = G1 + I6
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ }
+}
+
+void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+ assert(MBBI->getOpcode() == SP::RETL &&
+ "Can only put epilog before 'retl' instruction!");
+ BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+ .addReg(SP::G0);
+}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+ assert(0 && "What is the return address register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ assert(0 && "What is the frame register");
+ return SP::G1;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ assert(0 && "What is the dwarf register number");
+ return -1;
+}
+
+#include "SparcGenRegisterInfo.inc"
+
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 000000000000..fc863f3b28f0
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,67 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "SparcGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+ SparcSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 000000000000..e3a50ca42bbb
--- /dev/null
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,158 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the Sparc register file
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "SP";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+ let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+ let Num = num;
+ let SubRegs = subregs;
+}
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>;
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>;
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>;
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>;
+def O6 : Ri<14, "O6">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>;
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>;
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>;
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>;
+def I6 : Ri<30, "I6">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>;
+def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>;
+def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>;
+def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>;
+def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>;
+def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>;
+def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>;
+def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>;
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>;
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>;
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>;
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>;
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>;
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+def D2 : Rd< 4, "F4", [F4, F5]>, DwarfRegNum<[36]>;
+def D3 : Rd< 6, "F6", [F6, F7]>, DwarfRegNum<[38]>;
+def D4 : Rd< 8, "F8", [F8, F9]>, DwarfRegNum<[40]>;
+def D5 : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+def D6 : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+def D7 : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[46]>;
+def D8 : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+def D9 : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[50]>;
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[58]>;
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
+ I0, I1, I2, I3, I4, I5,
+ O0, O1, O2, O3, O4, O5, O7,
+
+ // FIXME: G1 reserved for now for large imm generation by frame code.
+ G1,
+ // Non-allocatable regs:
+ G2, G3, G4, // FIXME: OK for use only in
+ // applications, not libraries.
+ O6, // stack ptr
+ I6, // frame ptr
+ I7, // return address
+ G0, // constant zero
+ G5, G6, G7 // reserved for kernel
+ ]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ IntRegsClass::iterator
+ IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // FIXME: These special regs should be taken out of the regclass!
+ return end()-10 // Don't allocate special registers
+ -1; // FIXME: G1 reserved for large imm generation by frame code.
+ }
+ }];
+}
+
+def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
+ F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
+ F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15]>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 000000000000..aaddbff073ad
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,43 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "SparcGenSubtarget.inc"
+using namespace llvm;
+
+// FIXME: temporary.
+#include "llvm/Support/CommandLine.h"
+namespace {
+ cl::opt<bool> EnableV9("enable-sparc-v9-insts", cl::Hidden,
+ cl::desc("Enable V9 instructions in the V8 target"));
+}
+
+SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) {
+ // Set the default features.
+ IsV9 = false;
+ V8DeprecatedInsts = false;
+ IsVIS = false;
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+
+ // FIXME: autodetect host here!
+ CPU = "v9"; // What is a good way to detect V9?
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ // Unless explicitly enabled, disable the V9 instructions.
+ if (!EnableV9)
+ IsV9 = false;
+}
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 000000000000..e5a5ba47f106
--- /dev/null
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,43 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+ class Module;
+
+class SparcSubtarget : public TargetSubtarget {
+ bool IsV9;
+ bool V8DeprecatedInsts;
+ bool IsVIS;
+public:
+ SparcSubtarget(const Module &M, const std::string &FS);
+
+ bool isV9() const { return IsV9; }
+ bool isVIS() const { return IsVIS; }
+ bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.cpp b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
new file mode 100644
index 000000000000..c13d45ceec7c
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.cpp
@@ -0,0 +1,50 @@
+//===-- SparcTargetAsmInfo.cpp - Sparc asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+
+using namespace llvm;
+
+SparcELFTargetAsmInfo::SparcELFTargetAsmInfo(const TargetMachine &TM):
+ ELFTargetAsmInfo(TM) {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0; // .xword is only supported by V9.
+ ZeroDirective = "\t.skip\t";
+ CommentString = "!";
+ ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+ COMMDirectiveTakesAlignment = true;
+ CStringSection=".rodata.str";
+
+ // Sparc normally uses named section for BSS.
+ BSSSection_ = getNamedSection("\t.bss",
+ SectionFlags::Writeable | SectionFlags::BSS,
+ /* Override */ true);
+}
+
+std::string SparcELFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ if (flags & SectionFlags::Mergeable)
+ return ELFTargetAsmInfo::printSectionFlags(flags);
+
+ std::string Flags;
+ if (!(flags & SectionFlags::Debug))
+ Flags += ",#alloc";
+ if (flags & SectionFlags::Code)
+ Flags += ",#execinstr";
+ if (flags & SectionFlags::Writeable)
+ Flags += ",#write";
+ if (flags & SectionFlags::TLS)
+ Flags += ",#tls";
+
+ return Flags;
+}
diff --git a/lib/Target/Sparc/SparcTargetAsmInfo.h b/lib/Target/Sparc/SparcTargetAsmInfo.h
new file mode 100644
index 000000000000..1af5d80b5503
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetAsmInfo.h
@@ -0,0 +1,33 @@
+//=====-- SparcTargetAsmInfo.h - Sparc asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declaration.
+ class TargetMachine;
+
+ struct SparcELFTargetAsmInfo : public ELFTargetAsmInfo {
+ explicit SparcELFTargetAsmInfo(const TargetMachine &TM);
+
+ std::string printSectionFlags(unsigned flags) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 000000000000..eda030924100
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,94 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetAsmInfo.h"
+#include "SparcTargetMachine.h"
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// SparcTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int SparcTargetMachineModule;
+int SparcTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
+
+const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
+ // FIXME: Handle Solaris subtarget someday :)
+ return new SparcELFTargetAsmInfo(*this);
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+ : DataLayout("E-p:32:32-f128:128:128"),
+ Subtarget(M, FS), TLInfo(*this), InstrInfo(Subtarget),
+ FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+}
+
+unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-")
+ return 20;
+
+ // If the target triple is something non-sparc, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::BigEndian &&
+ M.getPointerSize() == Module::Pointer32)
+#ifdef __sparc__
+ return 20; // BE/32 ==> Prefer sparc on sparc
+#else
+ return 5; // BE/32 ==> Prefer ppc elsewhere
+#endif
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+#if defined(__sparc__)
+ return 10;
+#else
+ return 0;
+#endif
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createSparcISelDag(*this));
+ return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted. This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel){
+ PM.add(createSparcFPMoverPass(*this));
+ PM.add(createSparcDelaySlotFillerPass(*this));
+ return true;
+}
+
+bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 000000000000..40b44f2fb34d
--- /dev/null
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,63 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+#include "SparcISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class SparcTargetMachine : public LLVMTargetMachine {
+ const TargetData DataLayout; // Calculates type size & alignment
+ SparcSubtarget Subtarget;
+ SparcTargetLowering TLInfo;
+ SparcInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ SparcTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual SparcTargetLowering* getTargetLowering() const {
+ return const_cast<SparcTargetLowering*>(&TLInfo);
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
new file mode 100644
index 000000000000..f9370256c602
--- /dev/null
+++ b/lib/Target/SubtargetFeature.cpp
@@ -0,0 +1,364 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Streams.h"
+#include <algorithm>
+#include <ostream>
+#include <cassert>
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const std::string &Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const std::string &Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const std::string &Feature,
+ bool IsEnabled) {
+ assert(!Feature.empty() && "Empty string");
+ if (hasFlag(Feature)) return Feature;
+ return std::string(IsEnabled ? "+" : "-") + Feature;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const std::string &S) {
+ // Start at beginning of string.
+ size_t Pos = 0;
+ while (true) {
+ // Find the next comma
+ size_t Comma = S.find(',', Pos);
+ // If no comma found then the the rest of the string is used
+ if (Comma == std::string::npos) {
+ // Add string to vector
+ V.push_back(S.substr(Pos));
+ break;
+ }
+ // Otherwise add substring to vector
+ V.push_back(S.substr(Pos, Comma - Pos));
+ // Advance to next item
+ Pos = Comma + 1;
+ }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+ // Start with empty string.
+ std::string Result;
+ // If the vector is not empty
+ if (!V.empty()) {
+ // Start with the CPU feature
+ Result = V[0];
+ // For each successive feature
+ for (size_t i = 1; i < V.size(); i++) {
+ // Add a comma
+ Result += ",";
+ // Add the feature
+ Result += V[i];
+ }
+ }
+ // Return the features string
+ return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const std::string &String,
+ bool IsEnabled) {
+ // Don't add empty features
+ if (!String.empty()) {
+ // Convert to lowercase, prepend flag and add to vector
+ Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+ }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+ // Make the lower bound element we're looking for
+ T KV;
+ KV.Key = S.c_str();
+ // Determine the end of the array
+ const T *Hi = A + L;
+ // Binary search the array
+ const T *F = std::lower_bound(A, Hi, KV);
+ // If not found then return NULL
+ if (F == Hi || std::string(F->Key) != S) return NULL;
+ // Return the found array item
+ return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+ size_t Size) {
+ size_t MaxLen = 0;
+ for (size_t i = 0; i < Size; i++)
+ MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+ // Print the CPU table.
+ cerr << "Available CPUs for this target:\n\n";
+ for (size_t i = 0; i != CPUTableSize; i++)
+ cerr << " " << CPUTable[i].Key
+ << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+ << " - " << CPUTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ // Print the Feature table.
+ cerr << "Available features for this target:\n\n";
+ for (size_t i = 0; i != FeatTableSize; i++)
+ cerr << " " << FeatTable[i].Key
+ << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+ << " - " << FeatTable[i].Desc << ".\n";
+ cerr << "\n";
+
+ cerr << "Use +feature to enable a feature, or -feature to disable it.\n"
+ << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+ exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+ // Break up string into separate features
+ Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+ return Join(Features);
+}
+void SubtargetFeatures::setString(const std::string &Initial) {
+ // Throw out old features
+ Features.clear();
+ // Break up string into separate features
+ Split(Features, LowercaseString(Initial));
+}
+
+
+/// setCPU - Set the CPU string. Replaces previous setting. Setting to ""
+/// clears CPU.
+void SubtargetFeatures::setCPU(const std::string &String) {
+ Features[0] = LowercaseString(String);
+}
+
+
+/// setCPUIfNone - Setting CPU string only if no string is set.
+///
+void SubtargetFeatures::setCPUIfNone(const std::string &String) {
+ if (Features[0].empty()) setCPU(String);
+}
+
+/// getCPU - Returns current CPU.
+///
+const std::string & SubtargetFeatures::getCPU() const {
+ return Features[0];
+}
+
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FeatureEntry->Implies & FE.Value) {
+ Bits |= FE.Value;
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+///
+static
+void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+
+ if (FeatureEntry->Value == FE.Value) continue;
+
+ if (FE.Implies & FeatureEntry->Value) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ }
+}
+
+/// getBits - Get feature bits.
+///
+uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+ size_t CPUTableSize,
+ const SubtargetFeatureKV *FeatureTable,
+ size_t FeatureTableSize) {
+ assert(CPUTable && "missing CPU table");
+ assert(FeatureTable && "missing features table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < CPUTableSize; i++) {
+ assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+ "CPU table is not sorted");
+ }
+ for (size_t i = 1; i < FeatureTableSize; i++) {
+ assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+ "CPU features table is not sorted");
+ }
+#endif
+ uint32_t Bits = 0; // Resulting bits
+
+ // Check if help is needed
+ if (Features[0] == "help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find CPU entry
+ const SubtargetFeatureKV *CPUEntry =
+ Find(Features[0], CPUTable, CPUTableSize);
+ // If there is a match
+ if (CPUEntry) {
+ // Set base feature bits
+ Bits = CPUEntry->Value;
+
+ // Set the feature implied by this CPU feature, if any.
+ for (size_t i = 0; i < FeatureTableSize; ++i) {
+ const SubtargetFeatureKV &FE = FeatureTable[i];
+ if (CPUEntry->Value & FE.Value)
+ SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ }
+ // Iterate through each feature
+ for (size_t i = 1; i < Features.size(); i++) {
+ const std::string &Feature = Features[i];
+
+ // Check for help
+ if (Feature == "+help")
+ Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (isEnabled(Feature)) {
+ Bits |= FeatureEntry->Value;
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ } else {
+ Bits &= ~FeatureEntry->Value;
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ }
+ } else {
+ cerr << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)"
+ << "\n";
+ }
+ }
+
+ return Bits;
+}
+
+/// Get info pointer
+void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
+ size_t TableSize) {
+ assert(Table && "missing table");
+#ifndef NDEBUG
+ for (size_t i = 1; i < TableSize; i++) {
+ assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+ }
+#endif
+
+ // Find entry
+ const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+
+ if (Entry) {
+ return Entry->Value;
+ } else {
+ cerr << "'" << Features[0]
+ << "' is not a recognized processor for this target"
+ << " (ignoring processor)"
+ << "\n";
+ return NULL;
+ }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(std::ostream &OS) const {
+ for (size_t i = 0; i < Features.size(); i++) {
+ OS << Features[i] << " ";
+ }
+ OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+ print(*cerr.stream());
+}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
new file mode 100644
index 000000000000..ed544b73eaed
--- /dev/null
+++ b/lib/Target/Target.cpp
@@ -0,0 +1,94 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the C bindings for libLLVMTarget.a, which implements
+// target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include <cstring>
+
+using namespace llvm;
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+ return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+ unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+ std::string StringRep = unwrap(TD)->getStringRepresentation();
+ return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+ return unwrap(TD)->isLittleEndian();
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+ return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType());
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+ return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+ LLVMValueRef GlobalVar) {
+ return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned long long Offset) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+ unsigned Element) {
+ const StructType *STy = unwrap<StructType>(StructTy);
+ return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) {
+ unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy));
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+ delete unwrap(TD);
+}
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 000000000000..6a2de6f582a6
--- /dev/null
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,461 @@
+//===-- TargetAsmInfo.cpp - Asm Info ---------------------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include <cctype>
+#include <cstring>
+
+using namespace llvm;
+
+void TargetAsmInfo::fillDefaultValues() {
+ BSSSection = "\t.bss";
+ BSSSection_ = 0;
+ ReadOnlySection = 0;
+ SmallDataSection = 0;
+ SmallBSSSection = 0;
+ SmallRODataSection = 0;
+ TLSDataSection = 0;
+ TLSBSSSection = 0;
+ ZeroFillDirective = 0;
+ NonexecutableStackDirective = 0;
+ NeedsSet = false;
+ MaxInstLength = 4;
+ PCSymbol = "$";
+ SeparatorChar = ';';
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".";
+ LessPrivateGlobalPrefix = "";
+ JumpTableSpecialLabelPrefix = 0;
+ GlobalVarAddrPrefix = "";
+ GlobalVarAddrSuffix = "";
+ FunctionAddrPrefix = "";
+ FunctionAddrSuffix = "";
+ PersonalityPrefix = "";
+ PersonalitySuffix = "";
+ NeedsIndirectEncoding = false;
+ InlineAsmStart = "#APP";
+ InlineAsmEnd = "#NO_APP";
+ AssemblerDialect = 0;
+ StringConstantPrefix = ".str";
+ ZeroDirective = "\t.zero\t";
+ ZeroDirectiveSuffix = 0;
+ AsciiDirective = "\t.ascii\t";
+ AscizDirective = "\t.asciz\t";
+ Data8bitsDirective = "\t.byte\t";
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = "\t.quad\t";
+ AlignDirective = "\t.align\t";
+ AlignmentIsInBytes = true;
+ TextAlignFillValue = 0;
+ SwitchToSectionDirective = "\t.section\t";
+ TextSectionStartSuffix = "";
+ DataSectionStartSuffix = "";
+ SectionEndDirectiveSuffix = 0;
+ ConstantPoolSection = "\t.section .rodata";
+ JumpTableDataSection = "\t.section .rodata";
+ JumpTableDirective = 0;
+ CStringSection = 0;
+ CStringSection_ = 0;
+ // FIXME: Flags are ELFish - replace with normal section stuff.
+ StaticCtorsSection = "\t.section .ctors,\"aw\",@progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",@progbits";
+ GlobalDirective = "\t.globl\t";
+ SetDirective = 0;
+ LCOMMDirective = 0;
+ COMMDirective = "\t.comm\t";
+ COMMDirectiveTakesAlignment = true;
+ HasDotTypeDotSizeDirective = true;
+ HasSingleParameterDotFile = true;
+ UsedDirective = 0;
+ WeakRefDirective = 0;
+ WeakDefDirective = 0;
+ // FIXME: These are ELFish - move to ELFTAI.
+ HiddenDirective = "\t.hidden\t";
+ ProtectedDirective = "\t.protected\t";
+ AbsoluteDebugSectionOffsets = false;
+ AbsoluteEHSectionOffsets = false;
+ HasLEB128 = false;
+ HasDotLocAndDotFile = false;
+ SupportsDebugInformation = false;
+ SupportsExceptionHandling = false;
+ DwarfRequiresFrameSection = true;
+ DwarfUsesInlineInfoSection = false;
+ SupportsMacInfoSection = true;
+ NonLocalEHFrameLabel = false;
+ GlobalEHDirective = 0;
+ SupportsWeakOmittedEHFrame = true;
+ DwarfSectionOffsetDirective = 0;
+ DwarfAbbrevSection = ".debug_abbrev";
+ DwarfInfoSection = ".debug_info";
+ DwarfLineSection = ".debug_line";
+ DwarfFrameSection = ".debug_frame";
+ DwarfPubNamesSection = ".debug_pubnames";
+ DwarfPubTypesSection = ".debug_pubtypes";
+ DwarfDebugInlineSection = ".debug_inlined";
+ DwarfStrSection = ".debug_str";
+ DwarfLocSection = ".debug_loc";
+ DwarfARangesSection = ".debug_aranges";
+ DwarfRangesSection = ".debug_ranges";
+ DwarfMacInfoSection = ".debug_macinfo";
+ DwarfEHFrameSection = ".eh_frame";
+ DwarfExceptionSection = ".gcc_except_table";
+ AsmTransCBE = 0;
+ TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
+ DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
+}
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+ : TM(tm) {
+ fillDefaultValues();
+}
+
+TargetAsmInfo::~TargetAsmInfo() {
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorChar or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorChar or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetAsmInfo::getInlineAsmLength(const char *Str) const {
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || *Str == SeparatorChar)
+ atInsnStart = true;
+ if (atInsnStart && !isspace(*Str)) {
+ Length += MaxInstLength;
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, CommentString, strlen(CommentString))==0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
+
+unsigned TargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ return dwarf::DW_EH_PE_absptr;
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+ if (!GV->hasInitializer())
+ return true;
+
+ // Leave constant zeros in readonly constant sections, so they can be shared
+ Constant *C = GV->getInitializer();
+ return (C->isNullValue() && !GV->isConstant() && !NoZerosInBSS);
+}
+
+static bool isConstantString(const Constant *C) {
+ // First check: is we have constant array of i8 terminated with zero
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ // Check, if initializer is a null-terminated string
+ if (CVA && CVA->isCString())
+ return true;
+
+ // Another possibility: [1 x i8] zeroinitializer
+ if (isa<ConstantAggregateZero>(C)) {
+ if (const ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+ return (Ty->getElementType() == Type::Int8Ty &&
+ Ty->getNumElements() == 1);
+ }
+ }
+
+ return false;
+}
+
+unsigned TargetAsmInfo::RelocBehaviour() const {
+ // By default - all relocations in PIC mode would force symbol to be
+ // placed in r/w section.
+ return (TM.getRelocationModel() != Reloc::Static ?
+ Reloc::LocalOrGlobal : Reloc::None);
+}
+
+SectionKind::Kind
+TargetAsmInfo::SectionKindForGlobal(const GlobalValue *GV) const {
+ // Early exit - functions should be always in text sections.
+ if (isa<Function>(GV))
+ return SectionKind::Text;
+
+ const GlobalVariable* GVar = dyn_cast<GlobalVariable>(GV);
+ bool isThreadLocal = GVar->isThreadLocal();
+ assert(GVar && "Invalid global value for section selection");
+
+ if (isSuitableForBSS(GVar)) {
+ // Variable can be easily put to BSS section.
+ return (isThreadLocal ? SectionKind::ThreadBSS : SectionKind::BSS);
+ } else if (GVar->isConstant() && !isThreadLocal) {
+ // Now we know, that varible has initializer and it is constant. We need to
+ // check its initializer to decide, which section to output it into. Also
+ // note, there is no thread-local r/o section.
+ Constant *C = GVar->getInitializer();
+ if (C->ContainsRelocations(Reloc::LocalOrGlobal)) {
+ // Decide, whether it is still possible to put symbol into r/o section.
+ unsigned Reloc = RelocBehaviour();
+
+ // We already did a query for 'all' relocs, thus - early exits.
+ if (Reloc == Reloc::LocalOrGlobal)
+ return SectionKind::Data;
+ else if (Reloc == Reloc::None)
+ return SectionKind::ROData;
+ else {
+ // Ok, target wants something funny. Honour it.
+ return (C->ContainsRelocations(Reloc) ?
+ SectionKind::Data : SectionKind::ROData);
+ }
+ } else {
+ // Check, if initializer is a null-terminated string
+ if (isConstantString(C))
+ return SectionKind::RODataMergeStr;
+ else
+ return SectionKind::RODataMergeConst;
+ }
+ }
+
+ // Variable either is not constant or thread-local - output to data section.
+ return (isThreadLocal ? SectionKind::ThreadData : SectionKind::Data);
+}
+
+unsigned
+TargetAsmInfo::SectionFlagsForGlobal(const GlobalValue *GV,
+ const char* Name) const {
+ unsigned Flags = SectionFlags::None;
+
+ // Decode flags from global itself.
+ if (GV) {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ switch (Kind) {
+ case SectionKind::Text:
+ Flags |= SectionFlags::Code;
+ break;
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ Flags |= SectionFlags::TLS;
+ // FALLS THROUGH
+ case SectionKind::Data:
+ case SectionKind::DataRel:
+ case SectionKind::DataRelLocal:
+ case SectionKind::DataRelRO:
+ case SectionKind::DataRelROLocal:
+ case SectionKind::BSS:
+ Flags |= SectionFlags::Writeable;
+ break;
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeStr:
+ case SectionKind::RODataMergeConst:
+ // No additional flags here
+ break;
+ case SectionKind::SmallData:
+ case SectionKind::SmallBSS:
+ Flags |= SectionFlags::Writeable;
+ // FALLS THROUGH
+ case SectionKind::SmallROData:
+ Flags |= SectionFlags::Small;
+ break;
+ default:
+ assert(0 && "Unexpected section kind!");
+ }
+
+ if (GV->isWeakForLinker())
+ Flags |= SectionFlags::Linkonce;
+ }
+
+ // Add flags from sections, if any.
+ if (Name && *Name) {
+ Flags |= SectionFlags::Named;
+
+ // Some lame default implementation based on some magic section names.
+ if (strncmp(Name, ".gnu.linkonce.b.", 16) == 0 ||
+ strncmp(Name, ".llvm.linkonce.b.", 17) == 0 ||
+ strncmp(Name, ".gnu.linkonce.sb.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.sb.", 18) == 0)
+ Flags |= SectionFlags::BSS;
+ else if (strcmp(Name, ".tdata") == 0 ||
+ strncmp(Name, ".tdata.", 7) == 0 ||
+ strncmp(Name, ".gnu.linkonce.td.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.td.", 18) == 0)
+ Flags |= SectionFlags::TLS;
+ else if (strcmp(Name, ".tbss") == 0 ||
+ strncmp(Name, ".tbss.", 6) == 0 ||
+ strncmp(Name, ".gnu.linkonce.tb.", 17) == 0 ||
+ strncmp(Name, ".llvm.linkonce.tb.", 18) == 0)
+ Flags |= SectionFlags::BSS | SectionFlags::TLS;
+ }
+
+ return Flags;
+}
+
+const Section*
+TargetAsmInfo::SectionForGlobal(const GlobalValue *GV) const {
+ const Section* S;
+ // Select section name
+ if (GV->hasSection()) {
+ // Honour section already set, if any
+ unsigned Flags = SectionFlagsForGlobal(GV,
+ GV->getSection().c_str());
+ S = getNamedSection(GV->getSection().c_str(), Flags);
+ } else {
+ // Use default section depending on the 'type' of global
+ S = SelectSectionForGlobal(GV);
+ }
+
+ return S;
+}
+
+// Lame default implementation. Calculate the section name for global.
+const Section*
+TargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (GV->isWeakForLinker()) {
+ std::string Name = UniqueSectionForGlobal(GV, Kind);
+ unsigned Flags = SectionFlagsForGlobal(GV, Name.c_str());
+ return getNamedSection(Name.c_str(), Flags);
+ } else {
+ if (Kind == SectionKind::Text)
+ return getTextSection();
+ else if (isBSS(Kind) && getBSSSection_())
+ return getBSSSection_();
+ else if (getReadOnlySection() && SectionKind::isReadOnly(Kind))
+ return getReadOnlySection();
+ }
+
+ return getDataSection();
+}
+
+// Lame default implementation. Calculate the section name for machine const.
+const Section*
+TargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ // FIXME: Support data.rel stuff someday
+ return getDataSection();
+}
+
+std::string
+TargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind Kind) const {
+ switch (Kind) {
+ case SectionKind::Text:
+ return ".gnu.linkonce.t." + GV->getName();
+ case SectionKind::Data:
+ return ".gnu.linkonce.d." + GV->getName();
+ case SectionKind::DataRel:
+ return ".gnu.linkonce.d.rel" + GV->getName();
+ case SectionKind::DataRelLocal:
+ return ".gnu.linkonce.d.rel.local" + GV->getName();
+ case SectionKind::DataRelRO:
+ return ".gnu.linkonce.d.rel.ro" + GV->getName();
+ case SectionKind::DataRelROLocal:
+ return ".gnu.linkonce.d.rel.ro.local" + GV->getName();
+ case SectionKind::SmallData:
+ return ".gnu.linkonce.s." + GV->getName();
+ case SectionKind::BSS:
+ return ".gnu.linkonce.b." + GV->getName();
+ case SectionKind::SmallBSS:
+ return ".gnu.linkonce.sb." + GV->getName();
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeConst:
+ case SectionKind::RODataMergeStr:
+ return ".gnu.linkonce.r." + GV->getName();
+ case SectionKind::SmallROData:
+ return ".gnu.linkonce.s2." + GV->getName();
+ case SectionKind::ThreadData:
+ return ".gnu.linkonce.td." + GV->getName();
+ case SectionKind::ThreadBSS:
+ return ".gnu.linkonce.tb." + GV->getName();
+ default:
+ assert(0 && "Unknown section kind");
+ }
+ return NULL;
+}
+
+const Section*
+TargetAsmInfo::getNamedSection(const char *Name, unsigned Flags,
+ bool Override) const {
+ Section& S = Sections[Name];
+
+ // This is newly-created section, set it up properly.
+ if (S.Flags == SectionFlags::Invalid || Override) {
+ S.Flags = Flags | SectionFlags::Named;
+ S.Name = Name;
+ }
+
+ return &S;
+}
+
+const Section*
+TargetAsmInfo::getUnnamedSection(const char *Directive, unsigned Flags,
+ bool Override) const {
+ Section& S = Sections[Directive];
+
+ // This is newly-created section, set it up properly.
+ if (S.Flags == SectionFlags::Invalid || Override) {
+ S.Flags = Flags & ~SectionFlags::Named;
+ S.Name = Directive;
+ }
+
+ return &S;
+}
+
+const std::string&
+TargetAsmInfo::getSectionFlags(unsigned Flags) const {
+ SectionFlags::FlagsStringsMapType::iterator I = FlagsStrings.find(Flags);
+
+ // We didn't print these flags yet, print and save them to map. This reduces
+ // amount of heap trashing due to std::string construction / concatenation.
+ if (I == FlagsStrings.end())
+ I = FlagsStrings.insert(std::make_pair(Flags,
+ printSectionFlags(Flags))).first;
+
+ return I->second;
+}
+
+unsigned TargetAsmInfo::getULEB128Size(unsigned Value) {
+ unsigned Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t);
+ } while (Value);
+ return Size;
+}
+
+unsigned TargetAsmInfo::getSLEB128Size(int Value) {
+ unsigned Size = 0;
+ int Sign = Value >> (8 * sizeof(Value) - 1);
+ bool IsMore;
+
+ do {
+ unsigned Byte = Value & 0x7f;
+ Value >>= 7;
+ IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+ Size += sizeof(int8_t);
+ } while (IsMore);
+ return Size;
+}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
new file mode 100644
index 000000000000..67fefbb70b6a
--- /dev/null
+++ b/lib/Target/TargetData.cpp
@@ -0,0 +1,603 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&. None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false,
+ true);
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+ StructAlignment = 0;
+ StructSize = 0;
+ NumElements = ST->getNumElements();
+
+ // Loop over each of the elements, placing them in memory.
+ for (unsigned i = 0, e = NumElements; i != e; ++i) {
+ const Type *Ty = ST->getElementType(i);
+ unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+ // Add padding if necessary to align the data element properly.
+ if ((StructSize & (TyAlign-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+ // Keep track of maximum alignment constraint.
+ StructAlignment = std::max(TyAlign, StructAlignment);
+
+ MemberOffsets[i] = StructSize;
+ StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ }
+
+ // Empty structures have alignment of 1 byte.
+ if (StructAlignment == 0) StructAlignment = 1;
+
+ // Add padding to the end of the struct so that it could be put in an array
+ // and all array elements would be aligned correctly.
+ if ((StructSize & (StructAlignment-1)) != 0)
+ StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+ const uint64_t *SI =
+ std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+ assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+ --SI;
+ assert(*SI <= Offset && "upper_bound didn't work");
+ assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+ (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+ "Upper bound didn't work!");
+
+ // Multiple fields can have the same offset if any of them are zero sized.
+ // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+ // at the i32 element, because it is the last element at that offset. This is
+ // the right one to return, because anything after it will have a higher
+ // offset, implying that this element is non-empty.
+ return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ TargetAlignElem retval;
+ retval.AlignType = align_type;
+ retval.ABIAlign = abi_align;
+ retval.PrefAlign = pref_align;
+ retval.TypeBitWidth = bit_width;
+ return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+ return (AlignType == rhs.AlignType
+ && ABIAlign == rhs.ABIAlign
+ && PrefAlign == rhs.PrefAlign
+ && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+std::ostream &
+TargetAlignElem::dump(std::ostream &os) const {
+ return os << AlignType
+ << TypeBitWidth
+ << ":" << (int) (ABIAlign * 8)
+ << ":" << (int) (PrefAlign * 8);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+ TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+// TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/*!
+ A TargetDescription string consists of a sequence of hyphen-delimited
+ specifiers for target endianness, pointer size and alignments, and various
+ primitive type sizes and alignments. A typical string looks something like:
+ <br><br>
+ "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64"
+ <br><br>
+ (note: this string is not fully specified and is only an example.)
+ \p
+ Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align,
+ below) dictates how a type will be aligned within an aggregate and when used
+ as an argument. Preferred alignment (pref_align, below) determines a type's
+ alignment when emitted as a global.
+ \p
+ Specifier string details:
+ <br><br>
+ <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e"
+ specifies a little-endian target data model.
+ <br><br>
+ <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size,
+ ABI and preferred alignment.
+ <br><br>
+ <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type
+ alignment. Type is
+ one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector (aka
+ packed) or aggregate. Size indicates the size, e.g., 32 or 64 bits.
+ \p
+ The default string, fully specified is:
+ <br><br>
+ "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64"
+ "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64"
+ "-v64:64:64-v128:128:128"
+ <br><br>
+ Note that in the case of aggregates, 0 is the default ABI and preferred
+ alignment. This is a special case, where the aggregate's computed worst-case
+ alignment will be used.
+ */
+void TargetData::init(const std::string &TargetDescription) {
+ std::string temp = TargetDescription;
+
+ LittleEndian = false;
+ PointerMemSize = 8;
+ PointerABIAlign = 8;
+ PointerPrefAlign = PointerABIAlign;
+
+ // Default alignments
+ setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1
+ setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8
+ setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16
+ setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32
+ setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64
+ setAlignment(FLOAT_ALIGN, 4, 4, 32); // float
+ setAlignment(FLOAT_ALIGN, 8, 8, 64); // double
+ setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32
+ setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
+ setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct, union, class, ...
+
+ while (!temp.empty()) {
+ std::string token = getToken(temp, "-");
+ std::string arg0 = getToken(token, ":");
+ const char *p = arg0.c_str();
+ switch(*p) {
+ case 'E':
+ LittleEndian = false;
+ break;
+ case 'e':
+ LittleEndian = true;
+ break;
+ case 'p':
+ PointerMemSize = atoi(getToken(token,":").c_str()) / 8;
+ PointerABIAlign = atoi(getToken(token,":").c_str()) / 8;
+ PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8;
+ if (PointerPrefAlign == 0)
+ PointerPrefAlign = PointerABIAlign;
+ break;
+ case 'i':
+ case 'v':
+ case 'f':
+ case 'a':
+ case 's': {
+ AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning
+ switch(*p) {
+ case 'i': align_type = INTEGER_ALIGN; break;
+ case 'v': align_type = VECTOR_ALIGN; break;
+ case 'f': align_type = FLOAT_ALIGN; break;
+ case 'a': align_type = AGGREGATE_ALIGN; break;
+ case 's': align_type = STACK_ALIGN; break;
+ }
+ uint32_t size = (uint32_t) atoi(++p);
+ unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8;
+ unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8;
+ if (pref_align == 0)
+ pref_align = abi_align;
+ setAlignment(align_type, abi_align, pref_align, size);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+TargetData::TargetData(const Module *M)
+ : ImmutablePass(&ID) {
+ init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
+ unsigned char pref_align, uint32_t bit_width) {
+ assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == align_type &&
+ Alignments[i].TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ Alignments[i].ABIAlign = abi_align;
+ Alignments[i].PrefAlign = pref_align;
+ return;
+ }
+ }
+
+ Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+ uint32_t BitWidth, bool ABIInfo,
+ const Type *Ty) const {
+ // Check to see if we have an exact match and remember the best match we see.
+ int BestMatchIdx = -1;
+ int LargestInt = -1;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ if (Alignments[i].AlignType == AlignType &&
+ Alignments[i].TypeBitWidth == BitWidth)
+ return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+ // The best match so far depends on what we're looking for.
+ if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) {
+ // If this is a specification for a smaller vector type, we will fall back
+ // to it. This happens because <128 x double> can be implemented in terms
+ // of 64 <2 x double>.
+ if (Alignments[i].TypeBitWidth < BitWidth) {
+ // Verify that we pick the biggest of the fallbacks.
+ if (BestMatchIdx == -1 ||
+ Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth)
+ BestMatchIdx = i;
+ }
+ } else if (AlignType == INTEGER_ALIGN &&
+ Alignments[i].AlignType == INTEGER_ALIGN) {
+ // The "best match" for integers is the smallest size that is larger than
+ // the BitWidth requested.
+ if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+ Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+ BestMatchIdx = i;
+ // However, if there isn't one that's larger, then we must use the
+ // largest one we have (see below)
+ if (LargestInt == -1 ||
+ Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+ LargestInt = i;
+ }
+ }
+
+ // Okay, we didn't find an exact solution. Fall back here depending on what
+ // is being looked for.
+ if (BestMatchIdx == -1) {
+ // If we didn't find an integer alignment, fall back on most conservative.
+ if (AlignType == INTEGER_ALIGN) {
+ BestMatchIdx = LargestInt;
+ } else {
+ assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+ // If we didn't find a vector size that is smaller or equal to this type,
+ // then we will end up scalarizing this to its element type. Just return
+ // the alignment of the element.
+ return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo);
+ }
+ }
+
+ // Since we got a "best match" index, just return it.
+ return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+ : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+/// LayoutInfo - The lazy cache of structure layout information maintained by
+/// TargetData. Note that the struct types must have been free'd before
+/// llvm_shutdown is called (and thus this is deallocated) because all the
+/// targets with cached elements should have been destroyed.
+///
+typedef std::pair<const TargetData*,const StructType*> LayoutKey;
+
+struct DenseMapLayoutKeyInfo {
+ static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); }
+ static inline LayoutKey getTombstoneKey() {
+ return LayoutKey((TargetData*)(intptr_t)-1, 0);
+ }
+ static unsigned getHashValue(const LayoutKey &Val) {
+ return DenseMapInfo<void*>::getHashValue(Val.first) ^
+ DenseMapInfo<void*>::getHashValue(Val.second);
+ }
+ static bool isEqual(const LayoutKey &LHS, const LayoutKey &RHS) {
+ return LHS == RHS;
+ }
+
+ static bool isPod() { return true; }
+};
+
+typedef DenseMap<LayoutKey, StructLayout*, DenseMapLayoutKeyInfo> LayoutInfoTy;
+
+}
+
+static ManagedStatic<LayoutInfoTy> LayoutInfo;
+
+TargetData::~TargetData() {
+ if (!LayoutInfo.isConstructed())
+ return;
+
+ // Remove any layouts for this TD.
+ LayoutInfoTy &TheMap = *LayoutInfo;
+ for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) {
+ if (I->first.first == this) {
+ I->second->~StructLayout();
+ free(I->second);
+ TheMap.erase(I++);
+ } else {
+ ++I;
+ }
+ }
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+ LayoutInfoTy &TheMap = *LayoutInfo;
+
+ StructLayout *&SL = TheMap[LayoutKey(this, Ty)];
+ if (SL) return SL;
+
+ // Otherwise, create the struct layout. Because it is variable length, we
+ // malloc it, then use placement new.
+ int NumElts = Ty->getNumElements();
+ StructLayout *L =
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t));
+
+ // Set SL before calling StructLayout's ctor. The ctor could cause other
+ // entries to be added to TheMap, invalidating our reference.
+ SL = L;
+
+ new (L) StructLayout(Ty, *this);
+ return L;
+}
+
+/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
+/// objects. If a TargetData object is alive when types are being refined and
+/// removed, this method must be called whenever a StructType is removed to
+/// avoid a dangling pointer in this cache.
+void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
+ if (!LayoutInfo.isConstructed()) return; // No cache.
+
+ LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty));
+ if (I == LayoutInfo->end()) return;
+
+ I->second->~StructLayout();
+ free(I->second);
+ LayoutInfo->erase(I);
+}
+
+
+std::string TargetData::getStringRepresentation() const {
+ std::string repr;
+ repr.append(LittleEndian ? "e" : "E");
+ repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))).
+ append(":").append(itostr((int64_t) (PointerABIAlign * 8))).
+ append(":").append(itostr((int64_t) (PointerPrefAlign * 8)));
+ for (align_const_iterator I = Alignments.begin();
+ I != Alignments.end();
+ ++I) {
+ repr.append("-").append(1, (char) I->AlignType).
+ append(utostr((int64_t) I->TypeBitWidth)).
+ append(":").append(utostr((uint64_t) (I->ABIAlign * 8))).
+ append(":").append(utostr((uint64_t) (I->PrefAlign * 8)));
+ }
+ return repr;
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return getPointerSizeInBits();
+ case Type::ArrayTyID: {
+ const ArrayType *ATy = cast<ArrayType>(Ty);
+ return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+ }
+ case Type::StructTyID:
+ // Get the layout annotation... which is lazily created on demand.
+ return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+ case Type::IntegerTyID:
+ return cast<IntegerType>(Ty)->getBitWidth();
+ case Type::VoidTyID:
+ return 8;
+ case Type::FloatTyID:
+ return 32;
+ case Type::DoubleTyID:
+ return 64;
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ return 128;
+ // In memory objects this is always aligned to a higher boundary, but
+ // only 80 bits contain information.
+ case Type::X86_FP80TyID:
+ return 80;
+ case Type::VectorTyID:
+ return cast<VectorType>(Ty)->getBitWidth();
+ default:
+ assert(0 && "TargetData::getTypeSizeInBits(): Unsupported type");
+ break;
+ }
+ return 0;
+}
+
+/*!
+ \param abi_or_pref Flag that determines which alignment is returned. true
+ returns the ABI alignment, false returns the preferred alignment.
+ \param Ty The underlying type for which alignment is determined.
+
+ Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+ == false) for the requested type \a Ty.
+ */
+unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+ int AlignType = -1;
+
+ assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+ switch (Ty->getTypeID()) {
+ // Early escape for the non-numeric types.
+ case Type::LabelTyID:
+ case Type::PointerTyID:
+ return (abi_or_pref
+ ? getPointerABIAlignment()
+ : getPointerPrefAlignment());
+ case Type::ArrayTyID:
+ return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+ case Type::StructTyID: {
+ // Packed structure types always have an ABI alignment of one.
+ if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+ return 1;
+
+ // Get the layout annotation... which is lazily created on demand.
+ const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+ unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+ return std::max(Align, (unsigned)Layout->getAlignment());
+ }
+ case Type::IntegerTyID:
+ case Type::VoidTyID:
+ AlignType = INTEGER_ALIGN;
+ break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ // PPC_FP128TyID and FP128TyID have different data contents, but the
+ // same size and alignment, so they look the same here.
+ case Type::PPC_FP128TyID:
+ case Type::FP128TyID:
+ case Type::X86_FP80TyID:
+ AlignType = FLOAT_ALIGN;
+ break;
+ case Type::VectorTyID:
+ AlignType = VECTOR_ALIGN;
+ break;
+ default:
+ assert(0 && "Bad type for getAlignment!!!");
+ break;
+ }
+
+ return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+ abi_or_pref, Ty);
+}
+
+unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, true);
+}
+
+unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+ if (Alignments[i].AlignType == STACK_ALIGN)
+ return Alignments[i].ABIAlign;
+
+ return getABITypeAlignment(Ty);
+}
+
+unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+ return getAlignment(Ty, false);
+}
+
+unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+const IntegerType *TargetData::getIntPtrType() const {
+ return IntegerType::get(getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+ unsigned NumIndices) const {
+ const Type *Ty = ptrTy;
+ assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+ uint64_t Result = 0;
+
+ generic_gep_type_iterator<Value* const*>
+ TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+ for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+ if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+ assert(Indices[CurIDX]->getType() == Type::Int32Ty &&
+ "Illegal struct idx");
+ unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+ // Get structure layout information...
+ const StructLayout *Layout = getStructLayout(STy);
+
+ // Add in the offset, as calculated by the structure layout info...
+ Result += Layout->getElementOffset(FieldNo);
+
+ // Update Ty to refer to current element
+ Ty = STy->getElementType(FieldNo);
+ } else {
+ // Update Ty to refer to current element
+ Ty = cast<SequentialType>(Ty)->getElementType();
+
+ // Get the array index and the size of each array element.
+ int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue();
+ Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+ }
+ }
+
+ return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global. This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+ const Type *ElemType = GV->getType()->getElementType();
+ unsigned Alignment = getPrefTypeAlignment(ElemType);
+ if (GV->getAlignment() > Alignment)
+ Alignment = GV->getAlignment();
+
+ if (GV->hasInitializer()) {
+ if (Alignment < 16) {
+ // If the global is not external, see if it is large. If so, give it a
+ // larger alignment.
+ if (getTypeSizeInBits(ElemType) > 128)
+ Alignment = 16; // 16-byte alignment.
+ }
+ }
+ return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form. This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+ return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp
new file mode 100644
index 000000000000..873d60a1b5ff
--- /dev/null
+++ b/lib/Target/TargetFrameInfo.cpp
@@ -0,0 +1,19 @@
+//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameInfo::~TargetFrameInfo() {
+}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..ceaea0c2027c
--- /dev/null
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,50 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
+ unsigned numOpcodes)
+ : Descriptors(Desc), NumOpcodes(numOpcodes) {
+}
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (TID.isBranch() && !TID.isBarrier())
+ return true;
+ if (!TID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+/// getInstrOperandRegClass - Return register class of the operand of an
+/// instruction of the specified TargetInstrDesc.
+const TargetRegisterClass*
+llvm::getInstrOperandRegClass(const TargetRegisterInfo *TRI,
+ const TargetInstrDesc &II, unsigned Op) {
+ if (Op >= II.getNumOperands())
+ return NULL;
+ if (II.OpInfo[Op].isLookupPtrRegClass())
+ return TRI->getPointerRegClass();
+ return TRI->getRegClass(II.OpInfo[Op].RegClass);
+}
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 000000000000..d8da08e4f1d3
--- /dev/null
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,22 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo(const char **desc, unsigned count)
+ : Intrinsics(desc), NumIntrinsics(count) {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp
new file mode 100644
index 000000000000..d608119817d6
--- /dev/null
+++ b/lib/Target/TargetMachOWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetMachOWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+using namespace llvm;
+
+TargetMachOWriterInfo::~TargetMachOWriterInfo() {}
+
+MachineRelocation
+TargetMachOWriterInfo::GetJTRelocation(unsigned Offset,
+ MachineBasicBlock *MBB) const {
+ // FIXME: do something about PIC
+ return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB);
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
new file mode 100644
index 000000000000..1b042ddef9b8
--- /dev/null
+++ b/lib/Target/TargetMachine.cpp
@@ -0,0 +1,229 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+ bool LessPreciseFPMADOption;
+ bool PrintMachineCode;
+ bool NoFramePointerElim;
+ bool NoExcessFPPrecision;
+ bool UnsafeFPMath;
+ bool FiniteOnlyFPMathOption;
+ bool HonorSignDependentRoundingFPMathOption;
+ bool UseSoftFloat;
+ bool NoImplicitFloat;
+ bool NoZerosInBSS;
+ bool ExceptionHandling;
+ bool UnwindTablesMandatory;
+ Reloc::Model RelocationModel;
+ CodeModel::Model CMModel;
+ bool PerformTailCallOpt;
+ unsigned StackAlignment;
+ bool RealignStack;
+ bool DisableJumpTables;
+ bool StrongPHIElim;
+ bool DisableRedZone;
+ bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+ cl::desc("Print generated machine code"),
+ cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+ cl::desc("Disable frame pointer elimination optimization"),
+ cl::location(NoFramePointerElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+ cl::desc("Disable optimizations that may increase FP precision"),
+ cl::location(NoExcessFPPrecision),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+ cl::desc("Enable less precise MAD instructions to be generated"),
+ cl::location(LessPreciseFPMADOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+ cl::desc("Enable optimizations that may decrease FP precision"),
+ cl::location(UnsafeFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
+ cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
+ cl::location(FiniteOnlyFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+ cl::Hidden,
+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
+ cl::location(HonorSignDependentRoundingFPMathOption),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+ cl::desc("Generate software floating point library calls"),
+ cl::location(UseSoftFloat),
+ cl::init(false));
+static cl::opt<bool, true>
+GenerateNoImplicitFloats("no-implicit-float",
+ cl::desc("Don't generate implicit floating point instructions (x86-only)"),
+ cl::location(NoImplicitFloat),
+ cl::init(false));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+ cl::desc("Don't place zero-initialized symbols into bss section"),
+ cl::location(NoZerosInBSS),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableExceptionHandling("enable-eh",
+ cl::desc("Emit DWARF exception handling (default if target supports)"),
+ cl::location(ExceptionHandling),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableUnwindTables("unwind-tables",
+ cl::desc("Generate unwinding tables for all functions"),
+ cl::location(UnwindTablesMandatory),
+ cl::init(false));
+
+static cl::opt<llvm::Reloc::Model, true>
+DefRelocationModel("relocation-model",
+ cl::desc("Choose relocation model"),
+ cl::location(RelocationModel),
+ cl::init(Reloc::Default),
+ cl::values(
+ clEnumValN(Reloc::Default, "default",
+ "Target default relocation model"),
+ clEnumValN(Reloc::Static, "static",
+ "Non-relocatable code"),
+ clEnumValN(Reloc::PIC_, "pic",
+ "Fully relocatable, position independent code"),
+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+ "Relocatable external references, non-relocatable code"),
+ clEnumValEnd));
+static cl::opt<llvm::CodeModel::Model, true>
+DefCodeModel("code-model",
+ cl::desc("Choose code model"),
+ cl::location(CMModel),
+ cl::init(CodeModel::Default),
+ cl::values(
+ clEnumValN(CodeModel::Default, "default",
+ "Target default code model"),
+ clEnumValN(CodeModel::Small, "small",
+ "Small code model"),
+ clEnumValN(CodeModel::Kernel, "kernel",
+ "Kernel code model"),
+ clEnumValN(CodeModel::Medium, "medium",
+ "Medium code model"),
+ clEnumValN(CodeModel::Large, "large",
+ "Large code model"),
+ clEnumValEnd));
+static cl::opt<bool, true>
+EnablePerformTailCallOpt("tailcallopt",
+ cl::desc("Turn on tail call optimization."),
+ cl::location(PerformTailCallOpt),
+ cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+ cl::desc("Override default stack alignment"),
+ cl::location(StackAlignment),
+ cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+ cl::desc("Realign stack if needed"),
+ cl::location(RealignStack),
+ cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables",
+ cl::desc("Do not generate jump tables."),
+ cl::location(DisableJumpTables),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+ cl::desc("Use strong PHI elimination."),
+ cl::location(StrongPHIElim),
+ cl::init(false));
+static cl::opt<bool, true>
+DisableRedZoneOption("disable-red-zone",
+ cl::desc("Do not emit code that uses the red zone."),
+ cl::location(DisableRedZone),
+ cl::init(false));
+
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::~TargetMachine() {
+ delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+ return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+ RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+ return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+ CMModel = Model;
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+ return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+ AsmVerbosityDefault = V;
+}
+
+namespace llvm {
+ /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+ /// is specified on the command line. When this flag is off(default), the
+ /// code generator is not allowed to generate mad (multiply add) if the
+ /// result is "less precise" than doing those operations individually.
+ bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+ /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
+ /// option is specified on the command line. If this returns false (default),
+ /// the code generator is not allowed to assume that FP arithmetic arguments
+ /// and results are never NaNs or +-Infs.
+ bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+
+ /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+ /// that the rounding mode of the FPU can change from its default.
+ bool HonorSignDependentRoundingFPMath() {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+ }
+}
+
diff --git a/lib/Target/TargetMachineRegistry.cpp b/lib/Target/TargetMachineRegistry.cpp
new file mode 100644
index 000000000000..c1a4777c6314
--- /dev/null
+++ b/lib/Target/TargetMachineRegistry.cpp
@@ -0,0 +1,78 @@
+//===-- TargetMachineRegistry.cpp - Target Auto Registration Impl ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the RegisterTarget class, which TargetMachine
+// implementations should use to register themselves with the system. This file
+// also exposes the TargetMachineRegistry class, which allows tools to inspect
+// all of registered targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachineRegistry.h"
+#include <algorithm>
+using namespace llvm;
+
+/// getClosestStaticTargetForModule - Given an LLVM module, pick the best target
+/// that is compatible with the module. If no close target can be found, this
+/// returns null and sets the Error string to a reason.
+const TargetMachineRegistry::entry *
+TargetMachineRegistry::getClosestStaticTargetForModule(const Module &M,
+ std::string &Error) {
+ std::vector<std::pair<unsigned, const entry *> > UsableTargets;
+ for (Registry<TargetMachine>::iterator I = begin(), E = end(); I != E; ++I)
+ if (unsigned Qual = I->ModuleMatchQualityFn(M))
+ UsableTargets.push_back(std::make_pair(Qual, &*I));
+
+ if (UsableTargets.empty()) {
+ Error = "No available targets are compatible with this module";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target, but make sure we don't have two equally
+ // good best targets.
+ std::sort(UsableTargets.begin(), UsableTargets.end());
+ if (UsableTargets.back().first ==UsableTargets[UsableTargets.size()-2].first){
+ Error = "Cannot choose between targets \"" +
+ std::string(UsableTargets.back().second->Name) + "\" and \"" +
+ std::string(UsableTargets[UsableTargets.size()-2].second->Name) + "\"";
+ return 0;
+ }
+ return UsableTargets.back().second;
+}
+
+/// getClosestTargetForJIT - Pick the best target that is compatible with
+/// the current host. If no close target can be found, this returns null
+/// and sets the Error string to a reason.
+const TargetMachineRegistry::entry *
+TargetMachineRegistry::getClosestTargetForJIT(std::string &Error) {
+ std::vector<std::pair<unsigned, const entry *> > UsableTargets;
+ for (Registry<TargetMachine>::iterator I = begin(), E = end(); I != E; ++I)
+ if (unsigned Qual = I->JITMatchQualityFn())
+ UsableTargets.push_back(std::make_pair(Qual, &*I));
+
+ if (UsableTargets.empty()) {
+ Error = "No JIT is available for this host";
+ return 0;
+ } else if (UsableTargets.size() == 1)
+ return UsableTargets.back().second;
+
+ // Otherwise, take the best target. If there is a tie, just pick one.
+ unsigned MaxQual = UsableTargets.front().first;
+ const entry *MaxQualTarget = UsableTargets.front().second;
+
+ for (unsigned i = 1, e = UsableTargets.size(); i != e; ++i)
+ if (UsableTargets[i].first > MaxQual) {
+ MaxQual = UsableTargets[i].first;
+ MaxQualTarget = UsableTargets[i].second;
+ }
+
+ return MaxQualTarget;
+}
+
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..a84fdaa4a802
--- /dev/null
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,144 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+ regclass_iterator RCB, regclass_iterator RCE,
+ int CFSO, int CFDO,
+ const unsigned* subregs, const unsigned subregsize,
+ const unsigned* superregs, const unsigned superregsize,
+ const unsigned* aliases, const unsigned aliasessize)
+ : SubregHash(subregs), SubregHashSize(subregsize),
+ SuperregHash(superregs), SuperregHashSize(superregsize),
+ AliasesHash(aliases), AliasesHashSize(aliasessize),
+ Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+ assert(NumRegs < FirstVirtualRegister &&
+ "Target has too many physical registers!");
+
+ CallFrameSetupOpcode = CFSO;
+ CallFrameDestroyOpcode = CFDO;
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+/// getPhysicalRegisterRegClass - Returns the Register Class of a physical
+/// register of the given type. If type is MVT::Other, then just return any
+/// register class the register belongs to.
+const TargetRegisterClass *
+TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most super register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = 0;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSuperClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ R.set(*I);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(NumRegs);
+ if (RC) {
+ getAllocatableSetForRC(MF, RC, Allocatable);
+ return Allocatable;
+ }
+
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ return Allocatable;
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is likely incorrect for the target.
+int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+ // Default is to do nothing.
+}
+
+const TargetRegisterClass *
+llvm::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B) {
+ // First take care of the trivial cases
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return 0;
+
+ // If B is a subclass of A, it will be handled in the loop below
+ if (B->hasSubClass(A))
+ return A;
+
+ const TargetRegisterClass *Best = 0;
+ for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
+ const TargetRegisterClass *X = *I; ++I) {
+ if (X == B)
+ return B; // B is a subclass of A
+
+ // X must be a common subclass of A and B
+ if (!B->hasSubClass(X))
+ continue;
+
+ // A superclass is definitely better.
+ if (!Best || Best->hasSuperClass(X)) {
+ Best = X;
+ continue;
+ }
+
+ // A subclass is definitely worse
+ if (Best->hasSubClass(X))
+ continue;
+
+ // Best and *I have no super/sub class relation - pick the larger class, or
+ // the smaller spill size.
+ int nb = std::distance(Best->begin(), Best->end());
+ int ni = std::distance(X->begin(), X->end());
+ if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
+ Best = X;
+ }
+ return Best;
+}
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp
new file mode 100644
index 000000000000..95c92cabaf70
--- /dev/null
+++ b/lib/Target/TargetSubtarget.cpp
@@ -0,0 +1,22 @@
+//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtarget.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtarget Class
+//
+TargetSubtarget::TargetSubtarget() {}
+
+TargetSubtarget::~TargetSubtarget() {}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..dbd03d8b9633
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,11 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMX86AsmPrinter
+ X86ATTAsmPrinter.cpp
+ X86AsmPrinter.cpp
+ X86IntelAsmPrinter.cpp
+ )
+
+target_name_of_partially_linked_object(LLVMX86CodeGen n)
+
+add_dependencies(LLVMX86AsmPrinter ${n})
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
new file mode 100644
index 000000000000..ba89ac69bf68
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
new file mode 100644
index 000000000000..8afe2ea9e10b
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -0,0 +1,1075 @@
+//===-- X86ATTAsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to AT&T format assembly
+// language. This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTAsmPrinter.h"
+#include "X86.h"
+#include "X86COFF.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "X86TargetAsmInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static std::string getPICLabelString(unsigned FnNum,
+ const TargetAsmInfo *TAI,
+ const X86Subtarget* Subtarget) {
+ std::string label;
+ if (Subtarget->isTargetDarwin())
+ label = "\"L" + utostr_32(FnNum) + "$pb\"";
+ else if (Subtarget->isTargetELF())
+ label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel";
+ else
+ assert(0 && "Don't know how to print PIC label!\n");
+
+ return label;
+}
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, Attribute::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+ for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+ Name != E; ++Name)
+ if (isprint(*Name))
+ OS << *Name;
+}
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86ATTAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ // Decorate names only when we're targeting Cygwin/Mingw32 targets
+ if (!Subtarget->isTargetCygMing())
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_') {
+ Name[0] = '@';
+ } else {
+ Name = '@' + Name;
+ }
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+
+ decorateName(CurrentFnName, F);
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ EmitAlignment(FnAlign, F);
+ break;
+ case Function::DLLExportLinkage:
+ case Function::ExternalLinkage:
+ EmitAlignment(FnAlign, F);
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ EmitAlignment(FnAlign, F);
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl\t" << CurrentFnName << '\n';
+ O << TAI->getWeakDefDirective() << CurrentFnName << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << CurrentFnName << "\n"
+ "\t.linkonce discard\n";
+ } else {
+ O << "\t.weak\t" << CurrentFnName << '\n';
+ }
+ break;
+ }
+
+ printVisibility(CurrentFnName, F->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << CurrentFnName << ",@function\n";
+ else if (Subtarget->isTargetCygMing()) {
+ O << "\t.def\t " << CurrentFnName
+ << ";\t.scl\t" <<
+ (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ O << CurrentFnName << ":\n";
+ // Add some workaround for linkonce linkage on Cygwin\MinGW
+ if (Subtarget->isTargetCygMing() &&
+ (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
+ O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ this->MF = &MF;
+ unsigned CC = F->getCallingConv();
+
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ if (F->hasDLLExportLinkage())
+ DLLExportedFns.insert(Mang->makeNameProper(F->getName(), ""));
+
+ // Print the 'header' of function
+ emitFunctionHeader(MF);
+
+ // Emit pre-function debug and/or EH information.
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ bool hasAnyRealCode = false;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block.
+ if (!VerboseAsm && (I->pred_empty() || I->isOnlyReachableByFallthrough())) {
+ // This is an entry block or a block that's only reachable via a
+ // fallthrough edge. In non-VerboseAsm mode, don't print the label.
+ } else {
+ printBasicBlockLabel(I, true, true, VerboseAsm);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ // Print the assembly for the instruction.
+ if (!II->isLabel())
+ hasAnyRealCode = true;
+ printMachineInstruction(II);
+ }
+ }
+
+ if (Subtarget->isTargetDarwin() && !hasAnyRealCode) {
+ // If the function is empty, then we need to emit *something*. Otherwise,
+ // the function's label might be associated with something that it wasn't
+ // meant to be associated with. We emit a noop in this situation.
+ // We are assuming inline asms are code.
+ O << "\tnop\n";
+ }
+
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
+
+ // Emit post-function debug information.
+ if (TAI->doesSupportDebugInformation())
+ DW->EndFunction(&MF);
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_;
+}
+
+static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ &&
+ (ST->isPICStyleRIPRel() || ST->isPICStyleGOT());
+}
+
+static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
+ return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
+}
+
+void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier, bool NotRIPRel) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Virtual registers should not make it this far!");
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << TRI->getAsmName(Reg);
+ return;
+ }
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier || (strcmp(Modifier, "debug") &&
+ strcmp(Modifier, "mem") &&
+ strcmp(Modifier, "call")))
+ O << '$';
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << '$';
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleStub())
+ O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << "$pb\"";
+ else if (Subtarget->isPICStyleGOT())
+ O << "@GOTOFF";
+ }
+
+ printOffset(MO.getOffset());
+
+ if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
+ O << "(%rip)";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool needCloseParen = false;
+
+ const GlobalValue *GV = MO.getGlobal();
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ bool isThreadLocal = GVar && GVar->isThreadLocal();
+
+ std::string Name = Mang->getValueName(GV);
+ decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ if (shouldPrintStub(TM, Subtarget)) {
+ // Link-once, declaration, or Weakly-linked global variables need
+ // non-lazily-resolved stubs
+ if (GV->isDeclaration() || GV->isWeakForLinker()) {
+ // Dynamically-resolved functions need a stub for the function.
+ if (isCallOp && isa<Function>(GV)) {
+ // Function stubs are no longer needed for Mac OS X 10.5 and up.
+ if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+ O << Name;
+ } else {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ }
+ } else if (GV->hasHiddenVisibility()) {
+ if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+ // Definition is not definitely in the current translation unit.
+ O << Name;
+ else {
+ HiddenGVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ if (GV->hasDLLImportLinkage())
+ O << "__imp_";
+ O << Name;
+ }
+
+ if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+ O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+ } else {
+ if (GV->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name;
+
+ if (isCallOp) {
+ if (shouldPrintPLT(TM, Subtarget)) {
+ // Assemble call via PLT for externally visible symbols
+ if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+ !GV->hasLocalLinkage())
+ O << "@PLT";
+ }
+ if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+ // Save function name for later type emission
+ FnStubs.insert(Name);
+ }
+ }
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ printOffset(MO.getOffset());
+
+ if (isThreadLocal) {
+ TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ O << "@TLSGD";
+ break;
+ case TLSModel::LocalDynamic:
+ // O << "@TLSLD"; // local dynamic not implemented
+ O << "@TLSGD";
+ break;
+ case TLSModel::InitialExec:
+ if (Subtarget->is64Bit()) {
+ assert (!NotRIPRel);
+ O << "@GOTTPOFF(%rip)";
+ } else {
+ O << "@INDNTPOFF";
+ }
+ break;
+ case TLSModel::LocalExec:
+ if (Subtarget->is64Bit())
+ O << "@TPOFF";
+ else
+ O << "@NTPOFF";
+ break;
+ default:
+ assert (0 && "Unknown TLS model");
+ }
+ } else if (isMemOp) {
+ if (shouldPrintGOT(TM, Subtarget)) {
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
+ O << "@GOT";
+ else
+ O << "@GOTOFF";
+ } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+ if (TM.getRelocationModel() != Reloc::Static) {
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
+ O << "@GOTPCREL";
+
+ if (needCloseParen) {
+ needCloseParen = false;
+ O << ')';
+ }
+ }
+
+ // Use rip when possible to reduce code size, except when
+ // index or base register are also part of the address. e.g.
+ // foo(%rip)(%rcx,%rax,4) is not legal
+ O << "(%rip)";
+ }
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool needCloseParen = false;
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ // Print function stub suffix unless it's Mac OS X 10.5 and up.
+ if (isCallOp && shouldPrintStub(TM, Subtarget) &&
+ !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ return;
+ }
+ if (!isMemOp && !isCallOp)
+ O << '$';
+ else if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ O << Name;
+
+ if (shouldPrintPLT(TM, Subtarget)) {
+ std::string GOTName(TAI->getGlobalPrefix());
+ GOTName+="_GLOBAL_OFFSET_TABLE_";
+ if (Name == GOTName)
+ // HACK! Emit extra offset to PC during printing GOT offset to
+ // compensate for the size of popl instruction. The resulting code
+ // should look like:
+ // call .piclabel
+ // piclabel:
+ // popl %some_register
+ // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+ O << " + [.-"
+ << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+
+ if (isCallOp)
+ O << "@PLT";
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ if (!isCallOp && Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
+
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier,
+ bool NotRIPRel) {
+ MachineOperand BaseReg = MI->getOperand(Op);
+ MachineOperand IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+ if (DispSpec.isGlobal() ||
+ DispSpec.isCPI() ||
+ DispSpec.isJTI() ||
+ DispSpec.isSymbol()) {
+ printOperand(MI, Op+3, "mem", NotRIPRel);
+ } else {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ unsigned BaseRegOperand = 0, IndexRegOperand = 2;
+
+ // There are cases where we can end up with ESP/RSP in the indexreg slot.
+ // If this happens, swap the base/index register to support assemblers that
+ // don't work when the index is *SP.
+ if (IndexReg.getReg() == X86::ESP || IndexReg.getReg() == X86::RSP) {
+ assert(ScaleVal == 1 && "Scale not supported for stack pointer!");
+ std::swap(BaseReg, IndexReg);
+ std::swap(BaseRegOperand, IndexRegOperand);
+ }
+
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op+BaseRegOperand, Modifier);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+IndexRegOperand, Modifier);
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier, bool NotRIPRel){
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ MachineOperand Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, Modifier, NotRIPRel);
+}
+
+void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ // We don't need .set machinery if we have GOT-style relocations
+ if (Subtarget->isPICStyleGOT())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ if (Subtarget->isPICStyleRIPRel())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << uid << '\n';
+ else
+ O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n';
+}
+
+void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+ O << label << '\n' << label << ':';
+}
+
+
+void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const
+{
+ const char *JTEntryDirective = MJTI->getEntrySize() == 4 ?
+ TAI->getData32bitsDirective() : TAI->getData64bitsDirective();
+
+ O << JTEntryDirective << ' ';
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStub()) {
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << uid << "_set_" << MBB->getNumber();
+ } else if (Subtarget->isPICStyleGOT()) {
+ printBasicBlockLabel(MBB, false, false, false);
+ O << "@GOTOFF";
+ } else
+ assert(0 && "Don't know how to print MBB label for this PIC mode");
+ } else
+ printBasicBlockLabel(MBB, false, false, false);
+}
+
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ case 'q': // Print DImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i64);
+ break;
+ }
+
+ O << '%'<< TRI->getAsmName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'c': // Don't print "$" before a global var name or constant.
+ printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print DImode register
+ if (MI->getOperand(OpNo).isReg())
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ printOperand(MI, OpNo);
+ return false;
+
+ case 'P': // Don't print @PLT, but do print as memory.
+ printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ case 'q': // Print SImode register
+ // These only apply to registers, ignore on mem.
+ break;
+ case 'P': // Don't print @PLT, but do print as memory.
+ printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true);
+ return false;
+ }
+ }
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
+/// AT&T syntax to the current output stream.
+///
+void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+/// doInitialization
+bool X86ATTAsmPrinter::doInitialization(Module &M) {
+
+ bool Result = AsmPrinter::doInitialization(M);
+
+ if (TAI->doesSupportDebugInformation()) {
+ // Let PassManager know we need debug information and relay
+ // the MachineModuleInfo address on to DwarfWriter.
+ // AsmPrinter::doInitialization did this analysis.
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->BeginModule(&M, MMI, O, this, TAI);
+ }
+
+ // Darwin wants symbols to be quoted if they have complex names.
+ if (Subtarget->isTargetDarwin())
+ Mang->setUseQuotes(true);
+
+ return Result;
+}
+
+
+void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+ const TargetData *TD = TM.getTargetData();
+
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar)) {
+ if (Subtarget->isTargetDarwin() &&
+ TM.getRelocationModel() == Reloc::Static) {
+ if (GVar->getName() == "llvm.global_ctors")
+ O << ".reference .constructors_used\n";
+ else if (GVar->getName() == "llvm.global_dtors")
+ O << ".reference .destructors_used\n";
+ }
+ return;
+ }
+
+ std::string name = Mang->getValueName(GVar);
+ Constant *C = GVar->getInitializer();
+ const Type *Type = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Type);
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ if (Subtarget->isTargetELF())
+ O << "\t.type\t" << name << ",@object\n";
+
+ SwitchToSection(TAI->SectionForGlobal(GVar));
+
+ if (C->isNullValue() && !GVar->hasSection() &&
+ !(Subtarget->isTargetDarwin() &&
+ TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
+ // FIXME: This seems to be pretty darwin-specific
+ if (GVar->hasExternalLinkage()) {
+ if (const char *Directive = TAI->getZeroFillDirective()) {
+ O << "\t.globl " << name << '\n';
+ O << Directive << "__DATA, __common, " << name << ", "
+ << Size << ", " << Align << '\n';
+ return;
+ }
+ }
+
+ if (!GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (TAI->getLCOMMDirective() != NULL) {
+ if (GVar->hasLocalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << ',' << Size;
+ if (Subtarget->isTargetDarwin())
+ O << ',' << Align;
+ } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+ return;
+ } else {
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ } else {
+ if (!Subtarget->isTargetCygMing()) {
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+ }
+ O << TAI->getCOMMDirective() << name << ',' << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ if (VerboseAsm) {
+ O << "\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ return;
+ }
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (Subtarget->isTargetDarwin()) {
+ O << "\t.globl " << name << '\n'
+ << TAI->getWeakDefDirective() << name << '\n';
+ } else if (Subtarget->isTargetCygMing()) {
+ O << "\t.globl\t" << name << "\n"
+ "\t.linkonce same_size\n";
+ } else {
+ O << "\t.weak\t" << name << '\n';
+ }
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm){
+ O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+ PrintUnmangledNameSafely(GVar, O);
+ }
+ O << '\n';
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << name << ", " << Size << '\n';
+
+ EmitGlobalConstant(C);
+}
+
+/// printGVStub - Print stub for a global value.
+///
+void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) {
+ printSuffixedName(GV, "$non_lazy_ptr", Prefix);
+ O << ":\n\t.indirect_symbol ";
+ if (Prefix) O << Prefix;
+ O << GV << "\n\t.long\t0\n";
+}
+
+/// printHiddenGVStub - Print stub for a hidden global value.
+///
+void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) {
+ EmitAlignment(2);
+ printSuffixedName(GV, "$non_lazy_ptr", Prefix);
+ if (Prefix) O << Prefix;
+ O << ":\n" << TAI->getData32bitsDirective() << GV << '\n';
+}
+
+
+bool X86ATTAsmPrinter::doFinalization(Module &M) {
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ printModuleLevelGV(I);
+
+ if (I->hasDLLExportLinkage())
+ DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),""));
+
+ // If the global is a extern weak symbol, remember to emit the weak
+ // reference!
+ // FIXME: This is rather hacky, since we'll emit references to ALL weak
+ // stuff, not used. But currently it's the only way to deal with extern weak
+ // initializers hidden deep inside constant expressions.
+ if (I->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(I);
+ }
+
+ for (Module::const_iterator I = M.begin(), E = M.end();
+ I != E; ++I) {
+ // If the global is a extern weak symbol, remember to emit the weak
+ // reference!
+ // FIXME: This is rather hacky, since we'll emit references to ALL weak
+ // stuff, not used. But currently it's the only way to deal with extern weak
+ // initializers hidden deep inside constant expressions.
+ if (I->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(I);
+ }
+
+ // Output linker support code for dllexported globals
+ if (!DLLExportedGVs.empty())
+ SwitchToDataSection(".section .drectve");
+
+ for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i)
+ O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
+
+ if (!DLLExportedFns.empty()) {
+ SwitchToDataSection(".section .drectve");
+ }
+
+ for (StringSet<>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i)
+ O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
+
+ if (Subtarget->isTargetDarwin()) {
+ SwitchToDataSection("");
+
+ // Output stubs for dynamically-linked functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
+ "self_modifying_code+pure_instructions,5", 0);
+ const char *p = i->getKeyData();
+ printSuffixedName(p, "$stub");
+ O << ":\n"
+ "\t.indirect_symbol " << p << "\n"
+ "\thlt ; hlt ; hlt ; hlt ; hlt\n";
+ }
+
+ O << '\n';
+
+ // Print global value stubs.
+ bool InStubSection = false;
+ if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<Function *>& Personalities = MMI->getPersonalities();
+ for (std::vector<Function *>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (!*I)
+ continue;
+ if (!InStubSection) {
+ SwitchToDataSection(
+ "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
+ InStubSection = true;
+ }
+ printGVStub((*I)->getNameStart(), "_");
+ }
+ }
+
+ // Output stubs for external and common global variables.
+ if (!InStubSection && !GVStubs.empty())
+ SwitchToDataSection(
+ "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
+ for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
+ i != e; ++i)
+ printGVStub(i->getKeyData());
+
+ if (!HiddenGVStubs.empty()) {
+ SwitchToSection(TAI->getDataSection());
+ for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
+ i != e; ++i)
+ printHiddenGVStub(i->getKeyData());
+ }
+
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
+ } else if (Subtarget->isTargetCygMing()) {
+ // Emit type information for external functions
+ for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i) {
+ O << "\t.def\t " << i->getKeyData()
+ << ";\t.scl\t" << COFF::C_EXT
+ << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
+ << ";\t.endef\n";
+ }
+
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+ } else if (Subtarget->isTargetELF()) {
+ // Emit final debug information.
+ DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
+ DW->EndModule();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter.inc"
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
new file mode 100644
index 000000000000..5b40e73bcb66
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -0,0 +1,164 @@
+//===-- X86ATTAsmPrinter.h - Convert X86 LLVM code to AT&T assembly -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ATTASMPRINTER_H
+#define X86ATTASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+
+class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ MachineModuleInfo *MMI;
+ const X86Subtarget *Subtarget;
+ public:
+ explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 AT&T-Style Assembly Printer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ if (Subtarget->isTargetDarwin() ||
+ Subtarget->isTargetELF() ||
+ Subtarget->isTargetCygMing()) {
+ AU.addRequired<MachineModuleInfo>();
+ }
+ AU.addRequired<DwarfWriter>();
+ AsmPrinter::getAnalysisUsage(AU);
+ }
+
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // These methods are used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0, bool NotRIPRel = false);
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void printMachineInstruction(const MachineInstr *MI);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL, bool NotRIPRel = false);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL, bool NotRIPRel = false);
+ void printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+ }
+ void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned uid) const;
+
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ void printModuleLevelGV(const GlobalVariable* GVar);
+
+ void printGVStub(const char *GV, const char *Prefix = NULL);
+ void printHiddenGVStub(const char *GV, const char *Prefix = NULL);
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+ void emitFunctionHeader(const MachineFunction &MF);
+
+ // Necessary for Darwin to print out the apprioriate types of linker stubs
+ StringSet<> FnStubs, GVStubs, HiddenGVStubs;
+
+ // Necessary for dllexport support
+ StringSet<> DLLExportedFns, DLLExportedGVs;
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
new file mode 100644
index 000000000000..c874849dc18e
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -0,0 +1,50 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file the shared super class printer that converts from our internal
+// representation of machine-dependent LLVM code to Intel and AT&T format
+// assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ATTAsmPrinter.h"
+#include "X86IntelAsmPrinter.h"
+#include "X86Subtarget.h"
+using namespace llvm;
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code
+/// for a MachineFunction to the given output stream, using the given target
+/// machine description.
+///
+FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
+
+ if (Subtarget->isFlavorIntel()) {
+ return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ } else {
+ return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
+ }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass);
+ }
+ } Registrator;
+}
+
+extern "C" int X86AsmPrinterForceLink;
+int X86AsmPrinterForceLink = 0;
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
new file mode 100644
index 000000000000..659934930d46
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -0,0 +1,609 @@
+//===-- X86IntelAsmPrinter.cpp - Convert X86 LLVM code to Intel assembly --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Intel format assembly language.
+// This printer is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelAsmPrinter.h"
+#include "X86InstrInfo.h"
+#include "X86TargetAsmInfo.h"
+#include "X86.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
+ const TargetData *TD) {
+ X86MachineFunctionInfo Info;
+ uint64_t Size = 0;
+
+ switch (F->getCallingConv()) {
+ case CallingConv::X86_StdCall:
+ Info.setDecorationStyle(StdCall);
+ break;
+ case CallingConv::X86_FastCall:
+ Info.setDecorationStyle(FastCall);
+ break;
+ default:
+ return Info;
+ }
+
+ unsigned argNum = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI, ++argNum) {
+ const Type* Ty = AI->getType();
+
+ // 'Dereference' type in case of byval parameter attribute
+ if (F->paramHasAttr(argNum, Attribute::ByVal))
+ Ty = cast<PointerType>(Ty)->getElementType();
+
+ // Size should be aligned to DWORD boundary
+ Size += ((TD->getTypeAllocSize(Ty) + 3)/4)*4;
+ }
+
+ // We're not supporting tooooo huge arguments :)
+ Info.setBytesToPopOnReturn((unsigned int)Size);
+ return Info;
+}
+
+
+/// decorateName - Query FunctionInfoMap and use this information for various
+/// name decoration.
+void X86IntelAsmPrinter::decorateName(std::string &Name,
+ const GlobalValue *GV) {
+ const Function *F = dyn_cast<Function>(GV);
+ if (!F) return;
+
+ // We don't want to decorate non-stdcall or non-fastcall functions right now
+ unsigned CC = F->getCallingConv();
+ if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
+ return;
+
+ FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
+
+ const X86MachineFunctionInfo *Info;
+ if (info_item == FunctionInfoMap.end()) {
+ // Calculate apropriate function info and populate map
+ FunctionInfoMap[F] = calculateFunctionInfo(F, TM.getTargetData());
+ Info = &FunctionInfoMap[F];
+ } else {
+ Info = &info_item->second;
+ }
+
+ const FunctionType *FT = F->getFunctionType();
+ switch (Info->getDecorationStyle()) {
+ case None:
+ break;
+ case StdCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+ break;
+ case FastCall:
+ // "Pure" variadic functions do not receive @0 suffix.
+ if (!FT->isVarArg() || (FT->getNumParams() == 0) ||
+ (FT->getNumParams() == 1 && F->hasStructRetAttr()))
+ Name += '@' + utostr_32(Info->getBytesToPopOnReturn());
+
+ if (Name[0] == '_')
+ Name[0] = '@';
+ else
+ Name = '@' + Name;
+
+ break;
+ default:
+ assert(0 && "Unsupported DecorationStyle");
+ }
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ SetupMachineFunction(MF);
+ O << "\n\n";
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out labels for the function.
+ const Function *F = MF.getFunction();
+ unsigned CC = F->getCallingConv();
+
+ // Populate function information map. Actually, We don't want to populate
+ // non-stdcall or non-fastcall functions' information right now.
+ if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
+ FunctionInfoMap[F] = *MF.getInfo<X86MachineFunctionInfo>();
+
+ decorateName(CurrentFnName, F);
+
+ SwitchToTextSection("_text", F);
+
+ unsigned FnAlign = 4;
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unsupported linkage type!");
+ case Function::PrivateLinkage:
+ case Function::InternalLinkage:
+ EmitAlignment(FnAlign);
+ break;
+ case Function::DLLExportLinkage:
+ DLLExportedFns.insert(CurrentFnName);
+ //FALLS THROUGH
+ case Function::ExternalLinkage:
+ O << "\tpublic " << CurrentFnName << "\n";
+ EmitAlignment(FnAlign);
+ break;
+ }
+
+ O << CurrentFnName << "\tproc near\n";
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ // Print a label for the basic block if there are any predecessors.
+ if (!I->pred_empty()) {
+ printBasicBlockLabel(I, true, true);
+ O << '\n';
+ }
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ printMachineInstruction(II);
+ }
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ O << CurrentFnName << "\tendp\n";
+
+ O.flush();
+
+ // We didn't modify anything.
+ return false;
+}
+
+void X86IntelAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
+ unsigned char value = MI->getOperand(Op).getImm();
+ assert(value <= 7 && "Invalid ssecc argument!");
+ switch (value) {
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
+ const char *Modifier) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT VT = (strcmp(Modifier,"subreg64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+ O << TRI->getName(Reg);
+ } else
+ O << "reg" << MO.getReg();
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+ case MachineOperand::MO_JumpTableIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << "_" << MO.getIndex();
+ return;
+ }
+ case MachineOperand::MO_ConstantPoolIndex: {
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ if (!isMemOp) O << "OFFSET ";
+ O << "[" << TAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ printOffset(MO.getOffset());
+ O << "]";
+ return;
+ }
+ case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+
+ decorateName(Name, GV);
+
+ if (!isMemOp && !isCallOp) O << "OFFSET ";
+ if (GV->hasDLLImportLinkage()) {
+ // FIXME: This should be fixed with full support of stdcall & fastcall
+ // CC's
+ O << "__imp_";
+ }
+ O << Name;
+ printOffset(MO.getOffset());
+ return;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ if (!isCallOp) O << "OFFSET ";
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+ default:
+ O << "<unknown operand type>"; return;
+ }
+}
+
+void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
+ unsigned Op,
+ const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(Op);
+ int ScaleVal = MI->getOperand(Op+1).getImm();
+ const MachineOperand &IndexReg = MI->getOperand(Op+2);
+ const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+ O << "[";
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOp(BaseReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << "*";
+ printOp(IndexReg, Modifier);
+ NeedPlus = true;
+ }
+
+ if (DispSpec.isGlobal() || DispSpec.isCPI() ||
+ DispSpec.isJTI()) {
+ if (NeedPlus)
+ O << " + ";
+ printOp(DispSpec, "mem");
+ } else {
+ int DispVal = DispSpec.getImm();
+ if (DispVal || (!BaseReg.getReg() && !IndexReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << DispVal;
+ }
+ }
+ O << "]";
+}
+
+void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier) {
+ assert(isMem(MI, Op) && "Invalid memory reference!");
+ MachineOperand Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4, Modifier);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op, Modifier);
+}
+
+void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const {
+ if (!TAI->getSetDirective())
+ return;
+
+ O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix()
+ << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',';
+ printBasicBlockLabel(MBB, false, false, false);
+ O << '-' << "\"L" << getFunctionNumber() << "$pb\"'\n";
+}
+
+void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
+ O << "\"L" << getFunctionNumber() << "$pb\"\n";
+ O << "\"L" << getFunctionNumber() << "$pb\":";
+}
+
+bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
+ const char Mode) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default: return true; // Unknown mode.
+ case 'b': // Print QImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ break;
+ case 'h': // Print QImode high register
+ Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ break;
+ case 'w': // Print HImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ break;
+ case 'k': // Print SImode register
+ Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ break;
+ }
+
+ O << '%' << TRI->getName(Reg);
+ return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86IntelAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'b': // Print QImode register
+ case 'h': // Print QImode high register
+ case 'w': // Print HImode register
+ case 'k': // Print SImode register
+ return printAsmMRegister(MI->getOperand(OpNo), ExtraCode[0]);
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemReference(MI, OpNo);
+ return false;
+}
+
+/// printMachineInstruction -- Print out a single X86 LLVM instruction
+/// MI in Intel syntax to the current output stream.
+///
+void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
+bool X86IntelAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ Mang->markCharUnacceptable('.');
+
+ O << "\t.686\n\t.model flat\n\n";
+
+ // Emit declarations for external functions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+ decorateName(Name, I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":near\n";
+ }
+
+ // Emit declarations for external globals. Note that VC++ always declares
+ // external globals to have type byte, and if that's good enough for VC++...
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) {
+ std::string Name = Mang->getValueName(I);
+
+ O << "\textern " ;
+ if (I->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name << ":byte\n";
+ }
+ }
+
+ return Result;
+}
+
+bool X86IntelAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ // Print out module-level global variables here.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ if (I->isDeclaration()) continue; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(I))
+ continue;
+
+ std::string name = Mang->getValueName(I);
+ Constant *C = I->getInitializer();
+ unsigned Align = TD->getPreferredAlignmentLog(I);
+ bool bCustomSegment = false;
+
+ switch (I->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment common 'COMMON'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::AppendingLinkage:
+ SwitchToDataSection("");
+ O << name << "?\tsegment public 'DATA'\n";
+ bCustomSegment = true;
+ // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
+ // are also available.
+ break;
+ case GlobalValue::DLLExportLinkage:
+ DLLExportedGVs.insert(name);
+ // FALL THROUGH
+ case GlobalValue::ExternalLinkage:
+ O << "\tpublic " << name << "\n";
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ SwitchToSection(TAI->getDataSection());
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ if (!bCustomSegment)
+ EmitAlignment(Align, I);
+
+ O << name << ":";
+ if (VerboseAsm)
+ O << "\t\t\t\t" << TAI->getCommentString()
+ << " " << I->getName();
+ O << '\n';
+
+ EmitGlobalConstant(C);
+
+ if (bCustomSegment)
+ O << name << "?\tends\n";
+ }
+
+ // Output linker support code for dllexported globals
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty()) {
+ SwitchToDataSection("");
+ O << "; WARNING: The following code is valid only with MASM v8.x"
+ << "and (possible) higher\n"
+ << "; This version of MASM is usually shipped with Microsoft "
+ << "Visual Studio 2005\n"
+ << "; or (possible) further versions. Unfortunately, there is no "
+ << "way to support\n"
+ << "; dllexported symbols in the earlier versions of MASM in fully "
+ << "automatic way\n\n";
+ O << "_drectve\t segment info alias('.drectve')\n";
+ }
+
+ for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+ e = DLLExportedGVs.end();
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << ",data'\n";
+
+ for (StringSet<>::iterator i = DLLExportedFns.begin(),
+ e = DLLExportedFns.end();
+ i != e; ++i)
+ O << "\t db ' /EXPORT:" << i->getKeyData() << "'\n";
+
+ if (!DLLExportedGVs.empty() || !DLLExportedFns.empty())
+ O << "_drectve\t ends\n";
+
+ // Bypass X86SharedAsmPrinter::doFinalization().
+ bool Result = AsmPrinter::doFinalization(M);
+ SwitchToDataSection("");
+ O << "\tend\n";
+ return Result;
+}
+
+void X86IntelAsmPrinter::EmitString(const ConstantArray *CVA) const {
+ unsigned NumElts = CVA->getNumOperands();
+ if (NumElts) {
+ // ML does not have escape sequences except '' for '. It also has a maximum
+ // string length of 255.
+ unsigned len = 0;
+ bool inString = false;
+ for (unsigned i = 0; i < NumElts; i++) {
+ int n = cast<ConstantInt>(CVA->getOperand(i))->getZExtValue() & 255;
+ if (len == 0)
+ O << "\tdb ";
+
+ if (n >= 32 && n <= 127) {
+ if (!inString) {
+ if (len > 0) {
+ O << ",'";
+ len += 2;
+ } else {
+ O << "'";
+ len++;
+ }
+ inString = true;
+ }
+ if (n == '\'') {
+ O << "'";
+ len++;
+ }
+ O << char(n);
+ } else {
+ if (inString) {
+ O << "'";
+ len++;
+ inString = false;
+ }
+ if (len > 0) {
+ O << ",";
+ len++;
+ }
+ O << n;
+ len += 1 + (n > 9) + (n > 99);
+ }
+
+ if (len > 60) {
+ if (inString) {
+ O << "'";
+ inString = false;
+ }
+ O << "\n";
+ len = 0;
+ }
+ }
+
+ if (len > 0) {
+ if (inString)
+ O << "'";
+ O << "\n";
+ }
+ }
+}
+
+// Include the auto-generated portion of the assembly writer.
+#include "X86GenAsmWriter1.inc"
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
new file mode 100644
index 000000000000..9520d982f692
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -0,0 +1,152 @@
+//===-- X86IntelAsmPrinter.h - Convert X86 LLVM code to Intel assembly ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Intel assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INTELASMPRINTER_H
+#define X86INTELASMPRINTER_H
+
+#include "../X86.h"
+#include "../X86MachineFunctionInfo.h"
+#include "../X86TargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
+ explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V) {}
+
+ virtual const char *getPassName() const {
+ return "X86 Intel-Style Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description. This method returns true if the
+ /// machine instruction was sufficiently described to print it, otherwise it
+ /// returns false.
+ bool printInstruction(const MachineInstr *MI);
+
+ // This method is used by the tablegen'erated instruction printer.
+ void printOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *Modifier = 0) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+ "Not physreg??");
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name; // Capitalized names
+ } else {
+ printOp(MO, Modifier);
+ }
+ }
+
+ void printi8mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "BYTE PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "WORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "XMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "DWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "QWORD PTR ";
+ printLeaMemReference(MI, OpNo, "subreg64");
+ }
+
+ bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printMachineInstruction(const MachineInstr *MI);
+ void printOp(const MachineOperand &MO, const char *Modifier = 0);
+ void printSSECC(const MachineInstr *MI, unsigned Op);
+ void printMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printLeaMemReference(const MachineInstr *MI, unsigned Op,
+ const char *Modifier=NULL);
+ void printPICJumpTableSetLabel(unsigned uid,
+ const MachineBasicBlock *MBB) const;
+ void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
+ const MachineBasicBlock *MBB) const {
+ AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB);
+ }
+ void printPICLabel(const MachineInstr *MI, unsigned Op);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ // We have to propagate some information about MachineFunction to
+ // AsmPrinter. It's ok, when we're printing the function, since we have
+ // access to MachineFunction and can get the appropriate MachineFunctionInfo.
+ // Unfortunately, this is not possible when we're printing reference to
+ // Function (e.g. calling it and so on). Even more, there is no way to get the
+ // corresponding MachineFunctions: it can even be not created at all. That's
+ // why we should use additional structure, when we're collecting all necessary
+ // information.
+ //
+ // This structure is using e.g. for name decoration for stdcall & fastcall'ed
+ // function, since we have to use arguments' size for decoration.
+ typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
+ FMFInfoMap FunctionInfoMap;
+
+ void decorateName(std::string& Name, const GlobalValue* GV);
+
+ virtual void EmitString(const ConstantArray *CVA) const;
+
+ // Necessary for dllexport support
+ StringSet<> DLLExportedFns, DLLExportedGVs;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
new file mode 100644
index 000000000000..d98299049a2d
--- /dev/null
+++ b/lib/Target/X86/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(LLVM_TARGET_DEFINITIONS X86.td)
+
+tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(X86GenRegisterNames.inc -gen-register-enums)
+tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenInstrNames.inc -gen-instr-enums)
+tablegen(X86GenInstrInfo.inc -gen-instr-desc)
+tablegen(X86GenAsmWriter.inc -gen-asm-writer)
+tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(X86GenDAGISel.inc -gen-dag-isel)
+tablegen(X86GenFastISel.inc -gen-fast-isel)
+tablegen(X86GenCallingConv.inc -gen-callingconv)
+tablegen(X86GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(X86CodeGen
+ X86CodeEmitter.cpp
+ X86ELFWriterInfo.cpp
+ X86FloatingPoint.cpp
+ X86FloatingPointRegKill.cpp
+ X86ISelDAGToDAG.cpp
+ X86ISelLowering.cpp
+ X86InstrInfo.cpp
+ X86JITInfo.cpp
+ X86RegisterInfo.cpp
+ X86Subtarget.cpp
+ X86TargetAsmInfo.cpp
+ X86TargetMachine.cpp
+ X86FastISel.cpp
+ )
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
new file mode 100644
index 000000000000..44f1c5d5a509
--- /dev/null
+++ b/lib/Target/X86/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMX86CodeGen
+TARGET = X86
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
+ X86GenRegisterInfo.inc X86GenInstrNames.inc \
+ X86GenInstrInfo.inc X86GenAsmWriter.inc \
+ X86GenAsmWriter1.inc X86GenDAGISel.inc \
+ X86GenFastISel.inc \
+ X86GenCallingConv.inc X86GenSubtarget.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
new file mode 100644
index 000000000000..be28e8b394a4
--- /dev/null
+++ b/lib/Target/X86/README-FPStack.txt
@@ -0,0 +1,85 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: FP stack related stuff
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+Some targets (e.g. athlons) prefer freep to fstp ST(0):
+http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
+
+//===---------------------------------------------------------------------===//
+
+This should use fiadd on chips where it is profitable:
+double foo(double P, int *I) { return P+*I; }
+
+We have fiadd patterns now but the followings have the same cost and
+complexity. We need a way to specify the later is more profitable.
+
+def FpADD32m : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (extloadf64f32 addr:$src2)))]>;
+ // ST(0) = ST(0) + [mem32]
+
+def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP:$dst, (fadd RFP:$src1,
+ (X86fild addr:$src2, i32)))]>;
+ // ST(0) = ST(0) + [mem32int]
+
+//===---------------------------------------------------------------------===//
+
+The FP stackifier needs to be global. Also, it should handle simple permutates
+to reduce number of shuffle instructions, e.g. turning:
+
+fld P -> fld Q
+fld Q fld P
+fxch
+
+or:
+
+fxch -> fucomi
+fucomi jl X
+jg X
+
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
+//===---------------------------------------------------------------------===//
+
+Add a target specific hook to DAG combiner to handle SINT_TO_FP and
+FP_TO_SINT when the source operand is already in memory.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+Opencode the sincos[f] libcall.
+
+//===---------------------------------------------------------------------===//
+
+None of the FPStack instructions are handled in
+X86RegisterInfo::foldMemoryOperand, which prevents the spiller from
+folding spill code into the instructions.
+
+//===---------------------------------------------------------------------===//
+
+Currently the x86 codegen isn't very good at mixing SSE and FPStack
+code:
+
+unsigned int foo(double x) { return x; }
+
+foo:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl (%esp), %eax
+ addl $20, %esp
+ ret
+
+This just requires being smarter when custom expanding fptoui.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt
new file mode 100644
index 000000000000..a6c8616b6d2c
--- /dev/null
+++ b/lib/Target/X86/README-MMX.txt
@@ -0,0 +1,71 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: MMX-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+#include <mmintrin.h>
+
+__v2si qux(int A) {
+ return (__v2si){ 0, A };
+}
+
+is compiled into:
+
+_qux:
+ subl $28, %esp
+ movl 32(%esp), %eax
+ movd %eax, %mm0
+ movq %mm0, (%esp)
+ movl (%esp), %eax
+ movl %eax, 20(%esp)
+ movq %mm0, 8(%esp)
+ movl 12(%esp), %eax
+ movl %eax, 16(%esp)
+ movq 16(%esp), %mm0
+ addl $28, %esp
+ ret
+
+Yuck!
+
+GCC gives us:
+
+_qux:
+ subl $12, %esp
+ movl 16(%esp), %eax
+ movl 20(%esp), %edx
+ movl $0, (%eax)
+ movl %edx, 4(%eax)
+ addl $12, %esp
+ ret $4
+
+//===---------------------------------------------------------------------===//
+
+We generate crappy code for this:
+
+__m64 t() {
+ return _mm_cvtsi32_si64(1);
+}
+
+_t:
+ subl $12, %esp
+ movl $1, %eax
+ movd %eax, %mm0
+ movq %mm0, (%esp)
+ movl (%esp), %eax
+ movl 4(%esp), %edx
+ addl $12, %esp
+ ret
+
+The extra stack traffic is covered in the previous entry. But the other reason
+is we are not smart about materializing constants in MMX registers. With -m64
+
+ movl $1, %eax
+ movd %eax, %mm0
+ movd %mm0, %rax
+ ret
+
+We should be using a constantpool load instead:
+ movq LC0(%rip), %rax
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
new file mode 100644
index 000000000000..71ad51c7984e
--- /dev/null
+++ b/lib/Target/X86/README-SSE.txt
@@ -0,0 +1,918 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: SSE-specific stuff.
+//===---------------------------------------------------------------------===//
+
+- Consider eliminating the unaligned SSE load intrinsics, replacing them with
+ unaligned LLVM load instructions.
+
+//===---------------------------------------------------------------------===//
+
+Expand libm rounding functions inline: Significant speedups possible.
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html
+
+//===---------------------------------------------------------------------===//
+
+When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and
+other fast SSE modes.
+
+//===---------------------------------------------------------------------===//
+
+Think about doing i64 math in SSE regs on x86-32.
+
+//===---------------------------------------------------------------------===//
+
+This testcase should have no SSE instructions in it, and only one load from
+a constant pool:
+
+double %test3(bool %B) {
+ %C = select bool %B, double 123.412, double 523.01123123
+ ret double %C
+}
+
+Currently, the select is being lowered, which prevents the dag combiner from
+turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)'
+
+The pattern isel got this one right.
+
+//===---------------------------------------------------------------------===//
+
+SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction
+like this:
+
+ X += y
+
+and the register allocator decides to spill X, it is cheaper to emit this as:
+
+Y += [xslot]
+store Y -> [xslot]
+
+than as:
+
+tmp = [xslot]
+tmp += y
+store tmp -> [xslot]
+
+..and this uses one fewer register (so this should be done at load folding
+time, not at spiller time). *Note* however that this can only be done
+if Y is dead. Here's a testcase:
+
+@.str_3 = external global [15 x i8]
+declare void @printf(i32, ...)
+define void @main() {
+build_tree.exit:
+ br label %no_exit.i7
+
+no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
+ %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ],
+ [ %tmp.34.i18, %no_exit.i7 ]
+ %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
+ [ %tmp.28.i16, %no_exit.i7 ]
+ %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
+ %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+ br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
+
+Compute_Tree.exit23: ; preds = %no_exit.i7
+ tail call void (i32, ...)* @printf( i32 0 )
+ store double %tmp.34.i18, double* null
+ ret void
+}
+
+We currently emit:
+
+.BBmain_1:
+ xorpd %XMM1, %XMM1
+ addsd %XMM0, %XMM1
+*** movsd %XMM2, QWORD PTR [%ESP + 8]
+*** addsd %XMM2, %XMM1
+*** movsd QWORD PTR [%ESP + 8], %XMM2
+ jmp .BBmain_1 # no_exit.i7
+
+This is a bugpoint reduced testcase, which is why the testcase doesn't make
+much sense (e.g. its an infinite loop). :)
+
+//===---------------------------------------------------------------------===//
+
+SSE should implement 'select_cc' using 'emulated conditional moves' that use
+pcmp/pand/pandn/por to do a selection instead of a conditional branch:
+
+double %X(double %Y, double %Z, double %A, double %B) {
+ %C = setlt double %A, %B
+ %z = add double %Z, 0.0 ;; select operand is not a load
+ %D = select bool %C, double %Y, double %z
+ ret double %D
+}
+
+We currently emit:
+
+_X:
+ subl $12, %esp
+ xorpd %xmm0, %xmm0
+ addsd 24(%esp), %xmm0
+ movsd 32(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ ucomisd 40(%esp), %xmm1
+ jb LBB_X_2
+LBB_X_1:
+ movsd %xmm0, %xmm2
+LBB_X_2:
+ movsd %xmm2, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+It's not clear whether we should use pxor or xorps / xorpd to clear XMM
+registers. The choice may depend on subtarget information. We should do some
+more experiments on different x86 machines.
+
+//===---------------------------------------------------------------------===//
+
+Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
+feasible.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+ if (copysign(1.0, x) == copysign(1.0, y))
+into:
+ if (x^y & mask)
+when using SSE.
+
+//===---------------------------------------------------------------------===//
+
+Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half
+of a v4sf value.
+
+//===---------------------------------------------------------------------===//
+
+Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}.
+Perhaps use pxor / xorp* to clear a XMM register first?
+
+//===---------------------------------------------------------------------===//
+
+How to decide when to use the "floating point version" of logical ops? Here are
+some code fragments:
+
+ movaps LCPI5_5, %xmm2
+ divps %xmm1, %xmm2
+ mulps %xmm2, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm2
+ andps LCPI5_1, %xmm3
+ por %xmm2, %xmm3
+ movdqa %xmm3, (%edi)
+
+ movaps LCPI5_5, %xmm1
+ divps %xmm0, %xmm1
+ mulps %xmm1, %xmm3
+ mulps 8656(%ecx), %xmm3
+ addps 8672(%ecx), %xmm3
+ andps LCPI5_6, %xmm1
+ andps LCPI5_1, %xmm3
+ orps %xmm1, %xmm3
+ movaps %xmm3, 112(%esp)
+ movaps %xmm3, (%ebx)
+
+Due to some minor source change, the later case ended up using orps and movaps
+instead of por and movdqa. Does it matter?
+
+//===---------------------------------------------------------------------===//
+
+X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
+to choose between movaps, movapd, and movdqa based on types of source and
+destination?
+
+How about andps, andpd, and pand? Do we really care about the type of the packed
+elements? If not, why not always use the "ps" variants which are likely to be
+shorter.
+
+//===---------------------------------------------------------------------===//
+
+External test Nurbs exposed some problems. Look for
+__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
+emits:
+
+ movaps (%edx), %xmm2 #59.21
+ movaps (%edx), %xmm5 #60.21
+ movaps (%edx), %xmm4 #61.21
+ movaps (%edx), %xmm3 #62.21
+ movl 40(%ecx), %ebp #69.49
+ shufps $0, %xmm2, %xmm5 #60.21
+ movl 100(%esp), %ebx #69.20
+ movl (%ebx), %edi #69.20
+ imull %ebp, %edi #69.49
+ addl (%eax), %edi #70.33
+ shufps $85, %xmm2, %xmm4 #61.21
+ shufps $170, %xmm2, %xmm3 #62.21
+ shufps $255, %xmm2, %xmm2 #63.21
+ lea (%ebp,%ebp,2), %ebx #69.49
+ negl %ebx #69.49
+ lea -3(%edi,%ebx), %ebx #70.33
+ shll $4, %ebx #68.37
+ addl 32(%ecx), %ebx #68.37
+ testb $15, %bl #91.13
+ jne L_B1.24 # Prob 5% #91.13
+
+This is the llvm code after instruction scheduling:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %reg1078 = MOV32ri -3
+ %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0
+ %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40
+ %reg1080 = IMUL32rr %reg1079, %reg1037
+ %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0
+ %reg1038 = LEA32r %reg1081, 1, %reg1080, -3
+ %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32
+ %reg1082 = SHL32ri %reg1038, 4
+ %reg1039 = ADD32rr %reg1036, %reg1082
+ %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0
+ %reg1034 = SHUFPSrr %reg1083, %reg1083, 170
+ %reg1032 = SHUFPSrr %reg1083, %reg1083, 0
+ %reg1035 = SHUFPSrr %reg1083, %reg1083, 255
+ %reg1033 = SHUFPSrr %reg1083, %reg1083, 85
+ %reg1040 = MOV32rr %reg1039
+ %reg1084 = AND32ri8 %reg1039, 15
+ CMP32ri8 %reg1084, 0
+ JE mbb<cond_next204,0xa914d30>
+
+Still ok. After register allocation:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+ %EAX = MOV32ri -3
+ %EDX = MOV32rm <fi#3>, 1, %NOREG, 0
+ ADD32rm %EAX<def&use>, %EDX, 1, %NOREG, 0
+ %EDX = MOV32rm <fi#7>, 1, %NOREG, 0
+ %EDX = MOV32rm %EDX, 1, %NOREG, 40
+ IMUL32rr %EAX<def&use>, %EDX
+ %ESI = MOV32rm <fi#5>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 0
+ MOV32mr <fi#4>, 1, %NOREG, 0, %ESI
+ %EAX = LEA32r %ESI, 1, %EAX, -3
+ %ESI = MOV32rm <fi#7>, 1, %NOREG, 0
+ %ESI = MOV32rm %ESI, 1, %NOREG, 32
+ %EDI = MOV32rr %EAX
+ SHL32ri %EDI<def&use>, 4
+ ADD32rr %EDI<def&use>, %ESI
+ %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0
+ %XMM1 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM1<def&use>, %XMM1, 170
+ %XMM2 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM2<def&use>, %XMM2, 0
+ %XMM3 = MOVAPSrr %XMM0
+ SHUFPSrr %XMM3<def&use>, %XMM3, 255
+ SHUFPSrr %XMM0<def&use>, %XMM0, 85
+ %EBX = MOV32rr %EDI
+ AND32ri8 %EBX<def&use>, 15
+ CMP32ri8 %EBX, 0
+ JE mbb<cond_next204,0xa914d30>
+
+This looks really bad. The problem is shufps is a destructive opcode. Since it
+appears as operand two in more than one shufps ops. It resulted in a number of
+copies. Note icc also suffers from the same problem. Either the instruction
+selector should select pshufd or The register allocator can made the two-address
+to three-address transformation.
+
+It also exposes some other problems. See MOV32ri -3 and the spills.
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500
+
+LLVM is producing bad code.
+
+LBB_main_4: # cond_true44
+ addps %xmm1, %xmm2
+ subps %xmm3, %xmm2
+ movaps (%ecx), %xmm4
+ movaps %xmm2, %xmm1
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+There are two problems. 1) No need to two loop induction variables. We can
+compare against 262144 * 16. 2) Known register coalescer issue. We should
+be able eliminate one of the movaps:
+
+ addps %xmm2, %xmm1 <=== Commute!
+ subps %xmm3, %xmm1
+ movaps (%ecx), %xmm4
+ movaps %xmm1, %xmm1 <=== Eliminate!
+ addps %xmm4, %xmm1
+ addl $16, %ecx
+ incl %edx
+ cmpl $262144, %edx
+ movaps %xmm3, %xmm2
+ movaps %xmm4, %xmm3
+ jne LBB_main_4 # cond_true44
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+__m128 test(float a) {
+ return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss doesn't modify the top 3 elements, the top elements of
+xmm1 are already zero'd. We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea. Consider code like this:
+
+__m128 test(__m128 a) {
+ float b = *(float*)&A;
+ ...
+ return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled. This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+
+__m128d test1( __m128d A, __m128d B) {
+ return _mm_shuffle_pd(A, B, 0x3);
+}
+
+compiles to
+
+shufpd $3, %xmm1, %xmm0
+
+Perhaps it's better to use unpckhpd instead?
+
+unpckhpd %xmm1, %xmm0
+
+Don't know if unpckhpd is faster. But it is shorter.
+
+//===---------------------------------------------------------------------===//
+
+This code generates ugly code, probably due to costs being off or something:
+
+define void @test(float* %P, <4 x float>* %P2 ) {
+ %xFloat0.688 = load float* %P
+ %tmp = load <4 x float>* %P2
+ %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3
+ store <4 x float> %inFloat3.713, <4 x float>* %P2
+ ret void
+}
+
+Generates:
+
+_test:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ pxor %xmm1, %xmm1
+ movaps %xmm0, %xmm2
+ shufps $50, %xmm1, %xmm2
+ shufps $132, %xmm2, %xmm0
+ movaps %xmm0, (%eax)
+ ret
+
+Would it be better to generate:
+
+_test:
+ movl 8(%esp), %ecx
+ movaps (%ecx), %xmm0
+ xor %eax, %eax
+ pinsrw $6, %eax, %xmm0
+ pinsrw $7, %eax, %xmm0
+ movaps %xmm0, (%ecx)
+ ret
+
+?
+
+//===---------------------------------------------------------------------===//
+
+Some useful information in the Apple Altivec / SSE Migration Guide:
+
+http://developer.apple.com/documentation/Performance/Conceptual/
+Accelerate_sse_migration/index.html
+
+e.g. SSE select using and, andnot, or. Various SSE compare translations.
+
+//===---------------------------------------------------------------------===//
+
+Add hooks to commute some CMPP operations.
+
+//===---------------------------------------------------------------------===//
+
+Apply the same transformation that merged four float into a single 128-bit load
+to loads from constant pool.
+
+//===---------------------------------------------------------------------===//
+
+Floating point max / min are commutable when -enable-unsafe-fp-path is
+specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other
+nodes which are selected to max / min instructions that are marked commutable.
+
+//===---------------------------------------------------------------------===//
+
+We should materialize vector constants like "all ones" and "signbit" with
+code like:
+
+ cmpeqps xmm1, xmm1 ; xmm1 = all-ones
+
+and:
+ cmpeqps xmm1, xmm1 ; xmm1 = all-ones
+ psrlq xmm1, 31 ; xmm1 = all 100000000000...
+
+instead of using a load from the constant pool. The later is important for
+ABS/NEG/copysign etc.
+
+//===---------------------------------------------------------------------===//
+
+These functions:
+
+#include <xmmintrin.h>
+__m128i a;
+void x(unsigned short n) {
+ a = _mm_slli_epi32 (a, n);
+}
+void y(unsigned n) {
+ a = _mm_slli_epi32 (a, n);
+}
+
+compile to ( -O3 -static -fomit-frame-pointer):
+_x:
+ movzwl 4(%esp), %eax
+ movd %eax, %xmm0
+ movaps _a, %xmm1
+ pslld %xmm0, %xmm1
+ movaps %xmm1, _a
+ ret
+_y:
+ movd 4(%esp), %xmm0
+ movaps _a, %xmm1
+ pslld %xmm0, %xmm1
+ movaps %xmm1, _a
+ ret
+
+"y" looks good, but "x" does silly movzwl stuff around into a GPR. It seems
+like movd would be sufficient in both cases as the value is already zero
+extended in the 32-bit stack slot IIRC. For signed short, it should also be
+save, as a really-signed value would be undefined for pslld.
+
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+int t1(double d) { return signbit(d); }
+
+This currently compiles to:
+ subl $12, %esp
+ movsd 16(%esp), %xmm0
+ movsd %xmm0, (%esp)
+ movl 4(%esp), %eax
+ shrl $31, %eax
+ addl $12, %esp
+ ret
+
+We should use movmskp{s|d} instead.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single
+(aligned) vector load. This functionality has a couple of problems.
+
+1. The code to infer alignment from loads of globals is in the X86 backend,
+ not the dag combiner. This is because dagcombine2 needs to be able to see
+ through the X86ISD::Wrapper node, which DAGCombine can't really do.
+2. The code for turning 4 x load into a single vector load is target
+ independent and should be moved to the dag combiner.
+3. The code for turning 4 x load into a vector load can only handle a direct
+ load from a global or a direct load from the stack. It should be generalized
+ to handle any load from P, P+4, P+8, P+12, where P can be anything.
+4. The alignment inference code cannot handle loads from globals in non-static
+ mode because it doesn't look through the extra dyld stub load. If you try
+ vec_align.ll without -relocation-model=static, you'll see what I mean.
+
+//===---------------------------------------------------------------------===//
+
+We should lower store(fneg(load p), q) into an integer load+xor+store, which
+eliminates a constant pool load. For example, consider:
+
+define i64 @ccosf(float %z.0, float %z.1) nounwind readonly {
+entry:
+ %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1]
+ %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
+ ret i64 %tmp20
+}
+
+This currently compiles to:
+
+LCPI1_0: # <4 x float>
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+ .long 2147483648 # float -0
+_ccosf:
+ subl $12, %esp
+ movss 16(%esp), %xmm0
+ movss %xmm0, 4(%esp)
+ movss 20(%esp), %xmm0
+ xorps LCPI1_0, %xmm0
+ movss %xmm0, (%esp)
+ call L_ccoshf$stub
+ addl $12, %esp
+ ret
+
+Note the load into xmm0, then xor (to negate), then store. In PIC mode,
+this code computes the pic base and does two loads to do the constant pool
+load, so the improvement is much bigger.
+
+The tricky part about this xform is that the argument load/store isn't exposed
+until post-legalize, and at that point, the fneg has been custom expanded into
+an X86 fxor. This means that we need to handle this case in the x86 backend
+instead of in target independent code.
+
+//===---------------------------------------------------------------------===//
+
+Non-SSE4 insert into 16 x i8 is atrociously bad.
+
+//===---------------------------------------------------------------------===//
+
+<2 x i64> extract is substantially worse than <2 x f64>, even if the destination
+is memory.
+
+//===---------------------------------------------------------------------===//
+
+SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext
+sitting between the truncate and the extract.
+
+//===---------------------------------------------------------------------===//
+
+INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert
+any number of 0.0 simultaneously. Currently we only use it for simple
+insertions.
+
+See comments in LowerINSERT_VECTOR_ELT_SSE4.
+
+//===---------------------------------------------------------------------===//
+
+On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not
+Custom. All combinations of insert/extract reg-reg, reg-mem, and mem-reg are
+legal, it'll just take a few extra patterns written in the .td file.
+
+Note: this is not a code quality issue; the custom lowered code happens to be
+right, but we shouldn't have to custom lower anything. This is probably related
+to <2 x i64> ops being so bad.
+
+//===---------------------------------------------------------------------===//
+
+'select' on vectors and scalars could be a whole lot better. We currently
+lower them to conditional branches. On x86-64 for example, we compile this:
+
+double test(double a, double b, double c, double d) { return a<b ? c : d; }
+
+to:
+
+_test:
+ ucomisd %xmm0, %xmm1
+ ja LBB1_2 # entry
+LBB1_1: # entry
+ movapd %xmm3, %xmm2
+LBB1_2: # entry
+ movapd %xmm2, %xmm0
+ ret
+
+instead of:
+
+_test:
+ cmpltsd %xmm1, %xmm0
+ andpd %xmm0, %xmm2
+ andnpd %xmm3, %xmm0
+ orpd %xmm2, %xmm0
+ ret
+
+For unpredictable branches, the later is much more efficient. This should
+just be a matter of having scalar sse map to SELECT_CC and custom expanding
+or iseling it.
+
+//===---------------------------------------------------------------------===//
+
+LLVM currently generates stack realignment code, when it is not necessary
+needed. The problem is that we need to know about stack alignment too early,
+before RA runs.
+
+At that point we don't know, whether there will be vector spill, or not.
+Stack realignment logic is overly conservative here, but otherwise we can
+produce unaligned loads/stores.
+
+Fixing this will require some huge RA changes.
+
+Testcase:
+#include <emmintrin.h>
+
+typedef short vSInt16 __attribute__ ((__vector_size__ (16)));
+
+static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873,
+- 22725, - 12873};;
+
+vSInt16 madd(vSInt16 b)
+{
+ return _mm_madd_epi16(a, b);
+}
+
+Generated code (x86-32, linux):
+madd:
+ pushl %ebp
+ movl %esp, %ebp
+ andl $-16, %esp
+ movaps .LCPI1_0, %xmm1
+ pmaddwd %xmm1, %xmm0
+ movl %ebp, %esp
+ popl %ebp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+#include <emmintrin.h>
+__m128 foo2 (float x) {
+ return _mm_set_ps (0, 0, x, 0);
+}
+
+In x86-32 mode, we generate this spiffy code:
+
+_foo2:
+ movss 4(%esp), %xmm0
+ pshufd $81, %xmm0, %xmm0
+ ret
+
+in x86-64 mode, we generate this code, which could be better:
+
+_foo2:
+ xorps %xmm1, %xmm1
+ movss %xmm0, %xmm1
+ pshufd $81, %xmm1, %xmm0
+ ret
+
+In sse4 mode, we could use insertps to make both better.
+
+Here's another testcase that could use insertps [mem]:
+
+#include <xmmintrin.h>
+extern float x2, x3;
+__m128 foo1 (float x1, float x4) {
+ return _mm_set_ps (x2, x1, x3, x4);
+}
+
+gcc mainline compiles it to:
+
+foo1:
+ insertps $0x10, x2(%rip), %xmm0
+ insertps $0x10, x3(%rip), %xmm1
+ movaps %xmm1, %xmm2
+ movlhps %xmm0, %xmm2
+ movaps %xmm2, %xmm0
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We compile vector multiply-by-constant into poor code:
+
+define <4 x i32> @f(<4 x i32> %i) nounwind {
+ %A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 >
+ ret <4 x i32> %A
+}
+
+On targets without SSE4.1, this compiles into:
+
+LCPI1_0: ## <4 x i32>
+ .long 10
+ .long 10
+ .long 10
+ .long 10
+ .text
+ .align 4,0x90
+ .globl _f
+_f:
+ pshufd $3, %xmm0, %xmm1
+ movd %xmm1, %eax
+ imull LCPI1_0+12, %eax
+ movd %eax, %xmm1
+ pshufd $1, %xmm0, %xmm2
+ movd %xmm2, %eax
+ imull LCPI1_0+4, %eax
+ movd %eax, %xmm2
+ punpckldq %xmm1, %xmm2
+ movd %xmm0, %eax
+ imull LCPI1_0, %eax
+ movd %eax, %xmm1
+ movhlps %xmm0, %xmm0
+ movd %xmm0, %eax
+ imull LCPI1_0+8, %eax
+ movd %eax, %xmm0
+ punpckldq %xmm0, %xmm1
+ movaps %xmm1, %xmm0
+ punpckldq %xmm2, %xmm0
+ ret
+
+It would be better to synthesize integer vector multiplication by constants
+using shifts and adds, pslld and paddd here. And even on targets with SSE4.1,
+simple cases such as multiplication by powers of two would be better as
+vector shifts than as multiplications.
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+__m128i
+foo2 (char x)
+{
+ return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0);
+}
+
+into:
+ movl $1, %eax
+ xorps %xmm0, %xmm0
+ pinsrw $2, %eax, %xmm0
+ movzbl 4(%esp), %eax
+ pinsrw $3, %eax, %xmm0
+ movl $256, %eax
+ pinsrw $7, %eax, %xmm0
+ ret
+
+
+gcc-4.2:
+ subl $12, %esp
+ movzbl 16(%esp), %eax
+ movdqa LC0, %xmm0
+ pinsrw $3, %eax, %xmm0
+ addl $12, %esp
+ ret
+ .const
+ .align 4
+LC0:
+ .word 0
+ .word 0
+ .word 1
+ .word 0
+ .word 0
+ .word 0
+ .word 0
+ .word 256
+
+With SSE4, it should be
+ movdqa .LC0(%rip), %xmm0
+ pinsrb $6, %edi, %xmm0
+
+//===---------------------------------------------------------------------===//
+
+We should transform a shuffle of two vectors of constants into a single vector
+of constants. Also, insertelement of a constant into a vector of constants
+should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll.
+
+We compiled it to something horrible:
+
+ .align 4
+LCPI1_1: ## float
+ .long 1065353216 ## float 1
+ .const
+
+ .align 4
+LCPI1_0: ## <4 x float>
+ .space 4
+ .long 1065353216 ## float 1
+ .space 4
+ .long 1065353216 ## float 1
+ .text
+ .align 4,0x90
+ .globl _t
+_t:
+ xorps %xmm0, %xmm0
+ movhps LCPI1_0, %xmm0
+ movss LCPI1_1, %xmm1
+ movaps %xmm0, %xmm2
+ shufps $2, %xmm1, %xmm2
+ shufps $132, %xmm2, %xmm0
+ movaps %xmm0, 0
+
+//===---------------------------------------------------------------------===//
+rdar://5907648
+
+This function:
+
+float foo(unsigned char x) {
+ return x;
+}
+
+compiles to (x86-32):
+
+define float @foo(i8 zeroext %x) nounwind {
+ %tmp12 = uitofp i8 %x to float ; <float> [#uses=1]
+ ret float %tmp12
+}
+
+compiles to:
+
+_foo:
+ subl $4, %esp
+ movzbl 8(%esp), %eax
+ cvtsi2ss %eax, %xmm0
+ movss %xmm0, (%esp)
+ flds (%esp)
+ addl $4, %esp
+ ret
+
+We should be able to use:
+ cvtsi2ss 8($esp), %xmm0
+since we know the stack slot is already zext'd.
+
+//===---------------------------------------------------------------------===//
+
+Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))
+when code size is critical. movlps is slower than movsd on core2 but it's one
+byte shorter.
+
+//===---------------------------------------------------------------------===//
+
+We should use a dynamic programming based approach to tell when using FPStack
+operations is cheaper than SSE. SciMark montecarlo contains code like this
+for example:
+
+double MonteCarlo_num_flops(int Num_samples) {
+ return ((double) Num_samples)* 4.0;
+}
+
+In fpstack mode, this compiles into:
+
+LCPI1_0:
+ .long 1082130432 ## float 4.000000e+00
+_MonteCarlo_num_flops:
+ subl $4, %esp
+ movl 8(%esp), %eax
+ movl %eax, (%esp)
+ fildl (%esp)
+ fmuls LCPI1_0
+ addl $4, %esp
+ ret
+
+in SSE mode, it compiles into significantly slower code:
+
+_MonteCarlo_num_flops:
+ subl $12, %esp
+ cvtsi2sd 16(%esp), %xmm0
+ mulsd LCPI1_0, %xmm0
+ movsd %xmm0, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+There are also other cases in scimark where using fpstack is better, it is
+cheaper to do fld1 than load from a constant pool for example, so
+"load, add 1.0, store" is better done in the fp stack, etc.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt
new file mode 100644
index 000000000000..69dc8ee1af7c
--- /dev/null
+++ b/lib/Target/X86/README-UNIMPLEMENTED.txt
@@ -0,0 +1,14 @@
+//===---------------------------------------------------------------------===//
+// Testcases that crash the X86 backend because they aren't implemented
+//===---------------------------------------------------------------------===//
+
+These are cases we know the X86 backend doesn't handle. Patches are welcome
+and appreciated, because no one has signed up to implemented these yet.
+Implementing these would allow elimination of the corresponding intrinsics,
+which would be great.
+
+1) vector shifts
+2) vector comparisons
+3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688
+4) bitcasts from vectors to scalars: PR2804
+
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 000000000000..ad12137c8913
--- /dev/null
+++ b/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,251 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+Implement different PIC models? Right now we only support Mac OS X with small
+PIC code model.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+extern void xx(void);
+void bar(void) {
+ xx();
+}
+
+gcc compiles to:
+
+.globl _bar
+_bar:
+ jmp _xx
+
+We need to do the tailcall optimization as well.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+ ucomiss LC0(%rip), %xmm0
+ cvttss2siq %xmm0, %rdx
+ jb L3
+ subss LC0(%rip), %xmm0
+ movabsq $-9223372036854775808, %rax
+ cvttss2siq %xmm0, %rdx
+ xorq %rax, %rdx
+L3:
+ movq %rdx, %rax
+ ret
+
+instead of
+
+_conv:
+ movss LCPI1_0(%rip), %xmm1
+ cvttss2siq %xmm0, %rcx
+ movaps %xmm0, %xmm2
+ subss %xmm1, %xmm2
+ cvttss2siq %xmm2, %rax
+ movabsq $-9223372036854775808, %rdx
+ xorq %rdx, %rax
+ ucomiss %xmm1, %xmm0
+ cmovb %rcx, %rax
+ ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen:
+
+int X[2];
+int b;
+void test(void) {
+ memset(X, b, 2*sizeof(X[0]));
+}
+
+llc:
+ movq _b@GOTPCREL(%rip), %rax
+ movzbq (%rax), %rax
+ movq %rax, %rcx
+ shlq $8, %rcx
+ orq %rax, %rcx
+ movq %rcx, %rax
+ shlq $16, %rax
+ orq %rcx, %rax
+ movq %rax, %rcx
+ shlq $32, %rcx
+ movq _X@GOTPCREL(%rip), %rdx
+ orq %rax, %rcx
+ movq %rcx, (%rdx)
+ ret
+
+gcc:
+ movq _b@GOTPCREL(%rip), %rax
+ movabsq $72340172838076673, %rdx
+ movzbq (%rax), %rax
+ imulq %rdx, %rax
+ movq _X@GOTPCREL(%rip), %rdx
+ movq %rax, (%rdx)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Vararg function prologue can be further optimized. Currently all XMM registers
+are stored into register save area. Most of them can be eliminated since the
+upper bound of the number of XMM registers used are passed in %al. gcc produces
+something like the following:
+
+ movzbl %al, %edx
+ leaq 0(,%rdx,4), %rax
+ leaq 4+L2(%rip), %rdx
+ leaq 239(%rsp), %rax
+ jmp *%rdx
+ movaps %xmm7, -15(%rax)
+ movaps %xmm6, -31(%rax)
+ movaps %xmm5, -47(%rax)
+ movaps %xmm4, -63(%rax)
+ movaps %xmm3, -79(%rax)
+ movaps %xmm2, -95(%rax)
+ movaps %xmm1, -111(%rax)
+ movaps %xmm0, -127(%rax)
+L2:
+
+It jumps over the movaps that do not need to be stored. Hard to see this being
+significant as it added 5 instruciton (including a indirect branch) to avoid
+executing 0 to 8 stores in the function prologue.
+
+Perhaps we can optimize for the common case where no XMM registers are used for
+parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
+leaf function where we can determine that no XMM input parameter is need, avoid
+emitting the stores at all.
+
+//===---------------------------------------------------------------------===//
+
+AMD64 has a complex calling convention for aggregate passing by value:
+
+1. If the size of an object is larger than two eightbytes, or in C++, is a non-
+ POD structure or union type, or contains unaligned fields, it has class
+ MEMORY.
+2. Both eightbytes get initialized to class NO_CLASS.
+3. Each field of an object is classified recursively so that always two fields
+ are considered. The resulting class is calculated according to the classes
+ of the fields in the eightbyte:
+ (a) If both classes are equal, this is the resulting class.
+ (b) If one of the classes is NO_CLASS, the resulting class is the other
+ class.
+ (c) If one of the classes is MEMORY, the result is the MEMORY class.
+ (d) If one of the classes is INTEGER, the result is the INTEGER.
+ (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
+ class.
+ (f) Otherwise class SSE is used.
+4. Then a post merger cleanup is done:
+ (a) If one of the classes is MEMORY, the whole argument is passed in memory.
+ (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
+
+Currently llvm frontend does not handle this correctly.
+
+Problem 1:
+ typedef struct { int i; double d; } QuadWordS;
+It is currently passed in two i64 integer registers. However, gcc compiled
+callee expects the second element 'd' to be passed in XMM0.
+
+Problem 2:
+ typedef struct { int32_t i; float j; double d; } QuadWordS;
+The size of the first two fields == i64 so they will be combined and passed in
+a integer register RDI. The third field is still passed in XMM0.
+
+Problem 3:
+ typedef struct { int64_t i; int8_t j; int64_t d; } S;
+ void test(S s)
+The size of this aggregate is greater than two i64 so it should be passed in
+memory. Currently llvm breaks this down and passed it in three integer
+registers.
+
+Problem 4:
+Taking problem 3 one step ahead where a function expects a aggregate value
+in memory followed by more parameter(s) passed in register(s).
+ void test(S s, int b)
+
+LLVM IR does not allow parameter passing by aggregates, therefore it must break
+the aggregates value (in problem 3 and 4) into a number of scalar values:
+ void %test(long %s.i, byte %s.j, long %s.d);
+
+However, if the backend were to lower this code literally it would pass the 3
+values in integer registers. To force it be passed in memory, the frontend
+should change the function signiture to:
+ void %test(long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+And the callee would look something like this:
+ call void %test( undef, undef, undef, undef, undef, undef,
+ %tmp.s.i, %tmp.s.j, %tmp.s.d );
+The first 6 undef parameters would exhaust the 6 integer registers used for
+parameter passing. The following three integer values would then be forced into
+memory.
+
+For problem 4, the parameter 'd' would be moved to the front of the parameter
+list so it will be passed in register:
+ void %test(int %d,
+ long %undef1, long %undef2, long %undef3, long %undef4,
+ long %undef5, long %undef6,
+ long %s.i, byte %s.j, long %s.d);
+
+//===---------------------------------------------------------------------===//
+
+Right now the asm printer assumes GlobalAddress are accessed via RIP relative
+addressing. Therefore, it is not possible to generate this:
+ movabsq $__ZTV10polynomialIdE+16, %rax
+
+That is ok for now since we currently only support small model. So the above
+is selected as
+ leaq __ZTV10polynomialIdE+16(%rip), %rax
+
+This is probably slightly slower but is much shorter than movabsq. However, if
+we were to support medium or larger code models, we need to use the movabs
+instruction. We should probably introduce something like AbsoluteAddress to
+distinguish it from GlobalAddress so the asm printer and JIT code emitter can
+do the right thing.
+
+//===---------------------------------------------------------------------===//
+
+It's not possible to reference AH, BH, CH, and DH registers in an instruction
+requiring REX prefix. However, divb and mulb both produce results in AH. If isel
+emits a CopyFromReg which gets turned into a movb and that can be allocated a
+r8b - r15b.
+
+To get around this, isel emits a CopyFromReg from AX and then right shift it
+down by 8 and truncate it. It's not pretty but it works. We need some register
+allocation magic to make the hack go away (e.g. putting additional constraints
+on the result of the movb).
+
+//===---------------------------------------------------------------------===//
+
+The x86-64 ABI for hidden-argument struct returns requires that the
+incoming value of %rdi be copied into %rax by the callee upon return.
+
+The idea is that it saves callers from having to remember this value,
+which would often require a callee-saved register. Callees usually
+need to keep this value live for most of their body anyway, so it
+doesn't add a significant burden on them.
+
+We currently implement this in codegen, however this is suboptimal
+because it means that it would be quite awkward to implement the
+optimization for callers.
+
+A better implementation would be to relax the LLVM IR rules for sret
+arguments to allow a function with an sret argument to have a non-void
+return type, and to have the front-end to set up the sret argument value
+as the return value of the function. The front-end could more easily
+emit uses of the returned struct value to be in terms of the function's
+lowered return value, and it would free non-C frontends from a
+complication only required by a C-based ABI.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
new file mode 100644
index 000000000000..710bd0357433
--- /dev/null
+++ b/lib/Target/X86/README.txt
@@ -0,0 +1,1899 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend.
+//===---------------------------------------------------------------------===//
+
+We should add support for the "movbe" instruction, which does a byte-swapping
+copy (3-addr bswap + memory support?) This is available on Atom processors.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86
+backend knows how to three-addressify this shift, but it appears the register
+allocator isn't even asking it to do so in this case. We should investigate
+why this isn't happening, it could have significant impact on other important
+cases for X86 as well.
+
+//===---------------------------------------------------------------------===//
+
+This should be one DIV/IDIV instruction, not a libcall:
+
+unsigned test(unsigned long long X, unsigned Y) {
+ return X/Y;
+}
+
+This can be done trivially with a custom legalizer. What about overflow
+though? http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
+
+//===---------------------------------------------------------------------===//
+
+Improvements to the multiply -> shift/add algorithm:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
+
+//===---------------------------------------------------------------------===//
+
+Improve code like this (occurs fairly frequently, e.g. in LLVM):
+long long foo(int x) { return 1LL << x; }
+
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html
+
+Another useful one would be ~0ULL >> X and ~0ULL << X.
+
+One better solution for 1LL << x is:
+ xorl %eax, %eax
+ xorl %edx, %edx
+ testb $32, %cl
+ sete %al
+ setne %dl
+ sall %cl, %eax
+ sall %cl, %edx
+
+But that requires good 8-bit subreg support.
+
+Also, this might be better. It's an extra shift, but it's one instruction
+shorter, and doesn't stress 8-bit subreg support.
+(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html,
+but without the unnecessary and.)
+ movl %ecx, %eax
+ shrl $5, %eax
+ movl %eax, %edx
+ xorl $1, %edx
+ sall %cl, %eax
+ sall %cl. %edx
+
+64-bit shifts (in general) expand to really bad code. Instead of using
+cmovs, we should expand to a conditional branch like GCC produces.
+
+//===---------------------------------------------------------------------===//
+
+Compile this:
+_Bool f(_Bool a) { return a!=1; }
+
+into:
+ movzbl %dil, %eax
+ xorl $1, %eax
+ ret
+
+(Although note that this isn't a legal way to express the code that llvm-gcc
+currently generates for that function.)
+
+//===---------------------------------------------------------------------===//
+
+Some isel ideas:
+
+1. Dynamic programming based approach when compile time if not an
+ issue.
+2. Code duplication (addressing mode) during isel.
+3. Other ideas from "Register-Sensitive Selection, Duplication, and
+ Sequencing of Instructions".
+4. Scheduling for reduced register pressure. E.g. "Minimum Register
+ Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs"
+ and other related papers.
+ http://citeseer.ist.psu.edu/govindarajan01minimum.html
+
+//===---------------------------------------------------------------------===//
+
+Should we promote i16 to i32 to avoid partial register update stalls?
+
+//===---------------------------------------------------------------------===//
+
+Leave any_extend as pseudo instruction and hint to register
+allocator. Delay codegen until post register allocation.
+Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach
+the coalescer how to deal with it though.
+
+//===---------------------------------------------------------------------===//
+
+It appears icc use push for parameter passing. Need to investigate.
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor. They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+The instruction selector sometimes misses folding a load into a compare. The
+pattern is written as (cmp reg, (load p)). Because the compare isn't
+commutative, it is not matched with the load on both sides. The dag combiner
+should be made smart enough to cannonicalize the load into the RHS of a compare
+when it can invert the result of the compare for free.
+
+//===---------------------------------------------------------------------===//
+
+How about intrinsics? An example is:
+ *res = _mm_mulhi_epu16(*A, _mm_mul_epu32(*B, *C));
+
+compiles to
+ pmuludq (%eax), %xmm0
+ movl 8(%esp), %eax
+ movdqa (%eax), %xmm1
+ pmulhuw %xmm0, %xmm1
+
+The transformation probably requires a X86 specific pass or a DAG combiner
+target specific hook.
+
+//===---------------------------------------------------------------------===//
+
+In many cases, LLVM generates code like this:
+
+_test:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setl %al
+ movzbl %al, %eax
+ ret
+
+on some processors (which ones?), it is more efficient to do this:
+
+_test:
+ movl 8(%esp), %ebx
+ xor %eax, %eax
+ cmpl %ebx, 4(%esp)
+ setl %al
+ ret
+
+Doing this correctly is tricky though, as the xor clobbers the flags.
+
+//===---------------------------------------------------------------------===//
+
+We should generate bts/btr/etc instructions on targets where they are cheap or
+when codesize is important. e.g., for:
+
+void setbit(int *target, int bit) {
+ *target |= (1 << bit);
+}
+void clearbit(int *target, int bit) {
+ *target &= ~(1 << bit);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instead of the following for memset char*, 1, 10:
+
+ movl $16843009, 4(%edx)
+ movl $16843009, (%edx)
+ movw $257, 8(%edx)
+
+It might be better to generate
+
+ movl $16843009, %eax
+ movl %eax, 4(%edx)
+ movl %eax, (%edx)
+ movw al, 8(%edx)
+
+when we can spare a register. It reduces code size.
+
+//===---------------------------------------------------------------------===//
+
+Evaluate what the best way to codegen sdiv X, (2^C) is. For X/8, we currently
+get this:
+
+define i32 @test1(i32 %X) {
+ %Y = sdiv i32 %X, 8
+ ret i32 %Y
+}
+
+_test1:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ sarl $31, %ecx
+ shrl $29, %ecx
+ addl %ecx, %eax
+ sarl $3, %eax
+ ret
+
+GCC knows several different ways to codegen it, one of which is this:
+
+_test1:
+ movl 4(%esp), %eax
+ cmpl $-1, %eax
+ leal 7(%eax), %ecx
+ cmovle %ecx, %eax
+ sarl $3, %eax
+ ret
+
+which is probably slower, but it's interesting at least :)
+
+//===---------------------------------------------------------------------===//
+
+We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
+We should leave these as libcalls for everything over a much lower threshold,
+since libc is hand tuned for medium and large mem ops (avoiding RFO for large
+stores, TLB preheating, etc)
+
+//===---------------------------------------------------------------------===//
+
+Optimize this into something reasonable:
+ x * copysign(1.0, y) * copysign(1.0, z)
+
+//===---------------------------------------------------------------------===//
+
+Optimize copysign(x, *y) to use an integer load from y.
+
+//===---------------------------------------------------------------------===//
+
+The following tests perform worse with LSR:
+
+lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
+
+//===---------------------------------------------------------------------===//
+
+Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
+FR64 to VR128.
+
+//===---------------------------------------------------------------------===//
+
+Adding to the list of cmp / test poor codegen issues:
+
+int test(__m128 *A, __m128 *B) {
+ if (_mm_comige_ss(*A, *B))
+ return 3;
+ else
+ return 4;
+}
+
+_test:
+ movl 8(%esp), %eax
+ movaps (%eax), %xmm0
+ movl 4(%esp), %eax
+ movaps (%eax), %xmm1
+ comiss %xmm0, %xmm1
+ setae %al
+ movzbl %al, %ecx
+ movl $3, %eax
+ movl $4, %edx
+ cmpl $0, %ecx
+ cmove %edx, %eax
+ ret
+
+Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There
+are a number of issues. 1) We are introducing a setcc between the result of the
+intrisic call and select. 2) The intrinsic is expected to produce a i32 value
+so a any extend (which becomes a zero extend) is added.
+
+We probably need some kind of target DAG combine hook to fix this.
+
+//===---------------------------------------------------------------------===//
+
+We generate significantly worse code for this than GCC:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701
+
+There is also one case we do worse on PPC.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+ return a * 3;
+}
+
+We currently emits
+ imull $3, 4(%esp), %eax
+
+Perhaps this is what we really should generate is? Is imull three or four
+cycles? Note: ICC generates this:
+ movl 4(%esp), %eax
+ leal (%eax,%eax,2), %eax
+
+The current instruction priority is based on pattern complexity. The former is
+more "complex" because it folds a load so the latter will not be emitted.
+
+Perhaps we should use AddedComplexity to give LEA32r a higher priority? We
+should always try to match LEA first since the LEA matching code does some
+estimate to determine whether the match is profitable.
+
+However, if we care more about code size, then imull is better. It's two bytes
+shorter than movl + leal.
+
+On a Pentium M, both variants have the same characteristics with regard
+to throughput; however, the multiplication has a latency of four cycles, as
+opposed to two cycles for the movl+lea variant.
+
+//===---------------------------------------------------------------------===//
+
+__builtin_ffs codegen is messy.
+
+int ffs_(unsigned X) { return __builtin_ffs(X); }
+
+llvm produces:
+ffs_:
+ movl 4(%esp), %ecx
+ bsfl %ecx, %eax
+ movl $32, %edx
+ cmove %edx, %eax
+ incl %eax
+ xorl %edx, %edx
+ testl %ecx, %ecx
+ cmove %edx, %eax
+ ret
+
+vs gcc:
+
+_ffs_:
+ movl $-1, %edx
+ bsfl 4(%esp), %eax
+ cmove %edx, %eax
+ addl $1, %eax
+ ret
+
+Another example of __builtin_ffs (use predsimplify to eliminate a select):
+
+int foo (unsigned long j) {
+ if (j)
+ return __builtin_ffs (j) - 1;
+ else
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+It appears gcc place string data with linkonce linkage in
+.section __TEXT,__const_coal,coalesced instead of
+.section __DATA,__const_coal,coalesced.
+Take a look at darwin.h, there are other Darwin assembler directives that we
+do not make use of.
+
+//===---------------------------------------------------------------------===//
+
+define i32 @foo(i32* %a, i32 %t) {
+entry:
+ br label %cond_true
+
+cond_true: ; preds = %cond_true, %entry
+ %x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3]
+ %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1]
+ %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1]
+ %tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2]
+ %tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2]
+ %tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1]
+ br i1 %tmp, label %bb12, label %cond_true
+
+bb12: ; preds = %cond_true
+ ret i32 %tmp7
+}
+is pessimized by -loop-reduce and -indvars
+
+//===---------------------------------------------------------------------===//
+
+u32 to float conversion improvement:
+
+float uint32_2_float( unsigned u ) {
+ float fl = (int) (u & 0xffff);
+ float fh = (int) (u >> 16);
+ fh *= 0x1.0p16f;
+ return fh + fl;
+}
+
+00000000 subl $0x04,%esp
+00000003 movl 0x08(%esp,1),%eax
+00000007 movl %eax,%ecx
+00000009 shrl $0x10,%ecx
+0000000c cvtsi2ss %ecx,%xmm0
+00000010 andl $0x0000ffff,%eax
+00000015 cvtsi2ss %eax,%xmm1
+00000019 mulss 0x00000078,%xmm0
+00000021 addss %xmm1,%xmm0
+00000025 movss %xmm0,(%esp,1)
+0000002a flds (%esp,1)
+0000002d addl $0x04,%esp
+00000030 ret
+
+//===---------------------------------------------------------------------===//
+
+When using fastcc abi, align stack slot of argument of type double on 8 byte
+boundary to improve performance.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+
+int f(int a, int b) {
+ if (a == 4 || a == 6)
+ b++;
+ return b;
+}
+
+
+as:
+
+or eax, 2
+cmp eax, 6
+jz label
+
+//===---------------------------------------------------------------------===//
+
+GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
+simplifications for integer "x cmp y ? a : b". For example, instead of:
+
+int G;
+void f(int X, int Y) {
+ G = X < 0 ? 14 : 13;
+}
+
+compiling to:
+
+_f:
+ movl $14, %eax
+ movl $13, %ecx
+ movl 4(%esp), %edx
+ testl %edx, %edx
+ cmovl %eax, %ecx
+ movl %ecx, _G
+ ret
+
+it could be:
+_f:
+ movl 4(%esp), %eax
+ sarl $31, %eax
+ notl %eax
+ addl $14, %eax
+ movl %eax, _G
+ ret
+
+etc.
+
+Another is:
+int usesbb(unsigned int a, unsigned int b) {
+ return (a < b ? -1 : 0);
+}
+to:
+_usesbb:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ sbbl %eax, %eax
+ ret
+
+instead of:
+_usesbb:
+ xorl %eax, %eax
+ movl 8(%esp), %ecx
+ cmpl %ecx, 4(%esp)
+ movl $4294967295, %ecx
+ cmovb %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Currently we don't have elimination of redundant stack manipulations. Consider
+the code:
+
+int %main() {
+entry:
+ call fastcc void %test1( )
+ call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
+ ret int 0
+}
+
+declare fastcc void %test1()
+
+declare fastcc void %test2(sbyte*)
+
+
+This currently compiles to:
+
+ subl $16, %esp
+ call _test5
+ addl $12, %esp
+ subl $16, %esp
+ movl $_test5, (%esp)
+ call _test6
+ addl $12, %esp
+
+The add\sub pair is really unneeded here.
+
+//===---------------------------------------------------------------------===//
+
+Consider the expansion of:
+
+define i32 @test3(i32 %X) {
+ %tmp1 = urem i32 %X, 255
+ ret i32 %tmp1
+}
+
+Currently it compiles to:
+
+...
+ movl $2155905153, %ecx
+ movl 8(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+...
+
+This could be "reassociated" into:
+
+ movl $2155905153, %eax
+ movl 8(%esp), %ecx
+ mull %ecx
+
+to avoid the copy. In fact, the existing two-address stuff would do this
+except that mul isn't a commutative 2-addr instruction. I guess this has
+to be done at isel time based on the #uses to mul?
+
+//===---------------------------------------------------------------------===//
+
+Make sure the instruction which starts a loop does not cross a cacheline
+boundary. This requires knowning the exact length of each machine instruction.
+That is somewhat complicated, but doable. Example 256.bzip2:
+
+In the new trace, the hot loop has an instruction which crosses a cacheline
+boundary. In addition to potential cache misses, this can't help decoding as I
+imagine there has to be some kind of complicated decoder reset and realignment
+to grab the bytes from the next cacheline.
+
+532 532 0x3cfc movb (1809(%esp, %esi), %bl <<<--- spans 2 64 byte lines
+942 942 0x3d03 movl %dh, (1809(%esp, %esi)
+937 937 0x3d0a incl %esi
+3 3 0x3d0b cmpb %bl, %dl
+27 27 0x3d0d jnz 0x000062db <main+11707>
+
+//===---------------------------------------------------------------------===//
+
+In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE.
+
+//===---------------------------------------------------------------------===//
+
+This could be a single 16-bit load.
+
+int f(char *p) {
+ if ((p[0] == 1) & (p[1] == 2)) return 1;
+ return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+We should inline lrintf and probably other libc functions.
+
+//===---------------------------------------------------------------------===//
+
+Start using the flags more. For example, compile:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x += y) == 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+ addl %esi, (%rdi)
+ movl %edx, %eax
+ cmovne %ecx, %eax
+ ret
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ movl %esi, (%rdi)
+ testl %esi, %esi
+ cmove %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+and:
+
+int add_zf(int *x, int y, int a, int b) {
+ if ((*x + y) < 0)
+ return a;
+ else
+ return b;
+}
+
+to:
+
+add_zf:
+ addl (%rdi), %esi
+ movl %edx, %eax
+ cmovns %ecx, %eax
+ ret
+
+instead of:
+
+_add_zf:
+ addl (%rdi), %esi
+ testl %esi, %esi
+ cmovs %edx, %ecx
+ movl %ecx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+These two functions have identical effects:
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+
+We currently compile them to:
+
+_f:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ movl 8(%esp), %edx
+ cmpl %edx, %ecx
+ jne LBB1_2 #UnifiedReturnBlock
+LBB1_1: #cond_true
+ addl $2, %eax
+ ret
+LBB1_2: #UnifiedReturnBlock
+ movl %ecx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ movl %eax, %ecx
+ incl %ecx
+ cmpl 8(%esp), %ecx
+ sete %cl
+ movzbl %cl, %ecx
+ leal 1(%ecx,%eax), %eax
+ ret
+
+both of which are inferior to GCC's:
+
+_f:
+ movl 4(%esp), %edx
+ leal 1(%edx), %eax
+ addl $2, %edx
+ cmpl 8(%esp), %eax
+ cmove %edx, %eax
+ ret
+_f2:
+ movl 4(%esp), %eax
+ addl $1, %eax
+ xorl %edx, %edx
+ cmpl 8(%esp), %eax
+ sete %dl
+ addl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void test(int X) {
+ if (X) abort();
+}
+
+is currently compiled to:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne LBB1_1
+ addl $12, %esp
+ ret
+LBB1_1:
+ call L_abort$stub
+
+It would be better to produce:
+
+_test:
+ subl $12, %esp
+ cmpl $0, 16(%esp)
+ jne L_abort$stub
+ addl $12, %esp
+ ret
+
+This can be applied to any no-return function call that takes no arguments etc.
+Alternatively, the stack save/restore logic could be shrink-wrapped, producing
+something like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ subl $12, %esp
+ call L_abort$stub
+
+Both are useful in different situations. Finally, it could be shrink-wrapped
+and tail called, like this:
+
+_test:
+ cmpl $0, 4(%esp)
+ jne LBB1_1
+ ret
+LBB1_1:
+ pop %eax # realign stack.
+ call L_abort$stub
+
+Though this probably isn't worth it.
+
+//===---------------------------------------------------------------------===//
+
+We need to teach the codegen to convert two-address INC instructions to LEA
+when the flags are dead (likewise dec). For example, on X86-64, compile:
+
+int foo(int A, int B) {
+ return A+1;
+}
+
+to:
+
+_foo:
+ leal 1(%edi), %eax
+ ret
+
+instead of:
+
+_foo:
+ incl %edi
+ movl %edi, %eax
+ ret
+
+Another example is:
+
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y. Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered. However, this copy is not needed if the register
+;; allocator turns the shift into an LEA. This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: not grep {mov E.X, E.X}
+
+@G = external global i32 ; <i32*> [#uses=3]
+
+define i32 @test1(i32 %X, i32 %Y) {
+ %Z = add i32 %X, %Y ; <i32> [#uses=1]
+ volatile store i32 %Y, i32* @G
+ volatile store i32 %Z, i32* @G
+ ret i32 %X
+}
+
+define i32 @test2(i32 %X) {
+ %Z = add i32 %X, 1 ; <i32> [#uses=1]
+ volatile store i32 %Z, i32* @G
+ ret i32 %X
+}
+
+//===---------------------------------------------------------------------===//
+
+Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
+a neg instead of a sub instruction. Consider:
+
+int test(char X) { return 7-X; }
+
+we currently produce:
+_test:
+ movl $7, %eax
+ movsbl 4(%esp), %ecx
+ subl %ecx, %eax
+ ret
+
+We would use one fewer register if codegen'd as:
+
+ movsbl 4(%esp), %eax
+ neg %eax
+ add $7, %eax
+ ret
+
+Note that this isn't beneficial if the load can be folded into the sub. In
+this case, we want a sub:
+
+int test(int X) { return 7-X; }
+_test:
+ movl $7, %eax
+ subl 4(%esp), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Leaf functions that require one 4-byte spill slot have a prolog like this:
+
+_foo:
+ pushl %esi
+ subl $4, %esp
+...
+and an epilog like this:
+ addl $4, %esp
+ popl %esi
+ ret
+
+It would be smaller, and potentially faster, to push eax on entry and to
+pop into a dummy register instead of using addl/subl of esp. Just don't pop
+into any return registers :)
+
+//===---------------------------------------------------------------------===//
+
+The X86 backend should fold (branch (or (setcc, setcc))) into multiple
+branches. We generate really poor code for:
+
+double testf(double a) {
+ return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+For example, the entry BB is:
+
+_testf:
+ subl $20, %esp
+ pxor %xmm0, %xmm0
+ movsd 24(%esp), %xmm1
+ ucomisd %xmm0, %xmm1
+ setnp %al
+ sete %cl
+ testb %cl, %al
+ jne LBB1_5 # UnifiedReturnBlock
+LBB1_1: # cond_true
+
+
+it would be better to replace the last four instructions with:
+
+ jp LBB1_1
+ je LBB1_5
+LBB1_1:
+
+We also codegen the inner ?: into a diamond:
+
+ cvtss2sd LCPI1_0(%rip), %xmm2
+ cvtss2sd LCPI1_1(%rip), %xmm3
+ ucomisd %xmm1, %xmm0
+ ja LBB1_3 # cond_true
+LBB1_2: # cond_true
+ movapd %xmm3, %xmm2
+LBB1_3: # cond_true
+ movapd %xmm2, %xmm0
+ ret
+
+We should sink the load into xmm3 into the LBB1_2 block. This should
+be pretty easy, and will nuke all the copies.
+
+//===---------------------------------------------------------------------===//
+
+This:
+ #include <algorithm>
+ inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+ { return std::make_pair(a + b, a + b < a); }
+ bool no_overflow(unsigned a, unsigned b)
+ { return !full_add(a, b).second; }
+
+Should compile to:
+
+
+ _Z11no_overflowjj:
+ addl %edi, %esi
+ setae %al
+ ret
+
+FIXME: That code looks wrong; bool return is normally defined as zext.
+
+on x86-64, not:
+
+__Z11no_overflowjj:
+ addl %edi, %esi
+ cmpl %edi, %esi
+ setae %al
+ movzbl %al, %eax
+ ret
+
+
+//===---------------------------------------------------------------------===//
+
+Re-materialize MOV32r0 etc. with xor instead of changing them to moves if the
+condition register is dead. xor reg reg is shorter than mov reg, #0.
+
+//===---------------------------------------------------------------------===//
+
+We aren't matching RMW instructions aggressively
+enough. Here's a reduced testcase (more in PR1160):
+
+define void @test(i32* %huge_ptr, i32* %target_ptr) {
+ %A = load i32* %huge_ptr ; <i32> [#uses=1]
+ %B = load i32* %target_ptr ; <i32> [#uses=1]
+ %C = or i32 %A, %B ; <i32> [#uses=1]
+ store i32 %C, i32* %target_ptr
+ ret void
+}
+
+$ llvm-as < t.ll | llc -march=x86-64
+
+_test:
+ movl (%rdi), %eax
+ orl (%rsi), %eax
+ movl %eax, (%rsi)
+ ret
+
+That should be something like:
+
+_test:
+ movl (%rdi), %eax
+ orl %eax, (%rsi)
+ ret
+
+//===---------------------------------------------------------------------===//
+
+The following code:
+
+bb114.preheader: ; preds = %cond_next94
+ %tmp231232 = sext i16 %tmp62 to i32 ; <i32> [#uses=1]
+ %tmp233 = sub i32 32, %tmp231232 ; <i32> [#uses=1]
+ %tmp245246 = sext i16 %tmp65 to i32 ; <i32> [#uses=1]
+ %tmp252253 = sext i16 %tmp68 to i32 ; <i32> [#uses=1]
+ %tmp254 = sub i32 32, %tmp252253 ; <i32> [#uses=1]
+ %tmp553554 = bitcast i16* %tmp37 to i8* ; <i8*> [#uses=2]
+ %tmp583584 = sext i16 %tmp98 to i32 ; <i32> [#uses=1]
+ %tmp585 = sub i32 32, %tmp583584 ; <i32> [#uses=1]
+ %tmp614615 = sext i16 %tmp101 to i32 ; <i32> [#uses=1]
+ %tmp621622 = sext i16 %tmp104 to i32 ; <i32> [#uses=1]
+ %tmp623 = sub i32 32, %tmp621622 ; <i32> [#uses=1]
+ br label %bb114
+
+produces:
+
+LBB3_5: # bb114.preheader
+ movswl -68(%ebp), %eax
+ movl $32, %ecx
+ movl %ecx, -80(%ebp)
+ subl %eax, -80(%ebp)
+ movswl -52(%ebp), %eax
+ movl %ecx, -84(%ebp)
+ subl %eax, -84(%ebp)
+ movswl -70(%ebp), %eax
+ movl %ecx, -88(%ebp)
+ subl %eax, -88(%ebp)
+ movswl -50(%ebp), %eax
+ subl %eax, %ecx
+ movl %ecx, -76(%ebp)
+ movswl -42(%ebp), %eax
+ movl %eax, -92(%ebp)
+ movswl -66(%ebp), %eax
+ movl %eax, -96(%ebp)
+ movw $0, -98(%ebp)
+
+This appears to be bad because the RA is not folding the store to the stack
+slot into the movl. The above instructions could be:
+ movl $32, -80(%ebp)
+...
+ movl $32, -84(%ebp)
+...
+This seems like a cross between remat and spill folding.
+
+This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't
+change, so we could simply subtract %eax from %ecx first and then use %ecx (or
+vice-versa).
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+ %tmp659 = icmp slt i16 %tmp654, 0 ; <i1> [#uses=1]
+ br i1 %tmp659, label %cond_true662, label %cond_next715
+
+produces this:
+
+ testw %cx, %cx
+ movswl %cx, %esi
+ jns LBB4_109 # cond_next715
+
+Shark tells us that using %cx in the testw instruction is sub-optimal. It
+suggests using the 32-bit register (which is what ICC uses).
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+void compare (long long foo) {
+ if (foo < 4294967297LL)
+ abort();
+}
+
+to:
+
+compare:
+ subl $4, %esp
+ cmpl $0, 8(%esp)
+ setne %al
+ movzbw %al, %ax
+ cmpl $1, 12(%esp)
+ setg %cl
+ movzbw %cl, %cx
+ cmove %ax, %cx
+ testb $1, %cl
+ jne .LBB1_2 # UnifiedReturnBlock
+.LBB1_1: # ifthen
+ call abort
+.LBB1_2: # UnifiedReturnBlock
+ addl $4, %esp
+ ret
+
+(also really horrible code on ppc). This is due to the expand code for 64-bit
+compares. GCC produces multiple branches, which is much nicer:
+
+compare:
+ subl $12, %esp
+ movl 20(%esp), %edx
+ movl 16(%esp), %eax
+ decl %edx
+ jle .L7
+.L5:
+ addl $12, %esp
+ ret
+ .p2align 4,,7
+.L7:
+ jl .L4
+ cmpl $0, %eax
+ .p2align 4,,8
+ ja .L5
+.L4:
+ .p2align 4,,9
+ call abort
+
+//===---------------------------------------------------------------------===//
+
+Tail call optimization improvements: Tail call optimization currently
+pushes all arguments on the top of the stack (their normal place for
+non-tail call optimized calls) that source from the callers arguments
+or that source from a virtual register (also possibly sourcing from
+callers arguments).
+This is done to prevent overwriting of parameters (see example
+below) that might be used later.
+
+example:
+
+int callee(int32, int64);
+int caller(int32 arg1, int32 arg2) {
+ int64 local = arg2 * 2;
+ return callee(arg2, (int64)local);
+}
+
+[arg1] [!arg2 no longer valid since we moved local onto it]
+[arg2] -> [(int64)
+[RETADDR] local ]
+
+Moving arg1 onto the stack slot of callee function would overwrite
+arg2 of the caller.
+
+Possible optimizations:
+
+
+ - Analyse the actual parameters of the callee to see which would
+ overwrite a caller parameter which is used by the callee and only
+ push them onto the top of the stack.
+
+ int callee (int32 arg1, int32 arg2);
+ int caller (int32 arg1, int32 arg2) {
+ return callee(arg1,arg2);
+ }
+
+ Here we don't need to write any variables to the top of the stack
+ since they don't overwrite each other.
+
+ int callee (int32 arg1, int32 arg2);
+ int caller (int32 arg1, int32 arg2) {
+ return callee(arg2,arg1);
+ }
+
+ Here we need to push the arguments because they overwrite each
+ other.
+
+//===---------------------------------------------------------------------===//
+
+main ()
+{
+ int i = 0;
+ unsigned long int z = 0;
+
+ do {
+ z -= 0x00004000;
+ i++;
+ if (i > 0x00040000)
+ abort ();
+ } while (z > 0);
+ exit (0);
+}
+
+gcc compiles this to:
+
+_main:
+ subl $28, %esp
+ xorl %eax, %eax
+ jmp L2
+L3:
+ cmpl $262144, %eax
+ je L10
+L2:
+ addl $1, %eax
+ cmpl $262145, %eax
+ jne L3
+ call L_abort$stub
+L10:
+ movl $0, (%esp)
+ call L_exit$stub
+
+llvm:
+
+_main:
+ subl $12, %esp
+ movl $1, %eax
+ movl $16384, %ecx
+LBB1_1: # bb
+ cmpl $262145, %eax
+ jge LBB1_4 # cond_true
+LBB1_2: # cond_next
+ incl %eax
+ addl $4294950912, %ecx
+ cmpl $16384, %ecx
+ jne LBB1_1 # bb
+LBB1_3: # bb11
+ xorl %eax, %eax
+ addl $12, %esp
+ ret
+LBB1_4: # cond_true
+ call L_abort$stub
+
+1. LSR should rewrite the first cmp with induction variable %ecx.
+2. DAG combiner should fold
+ leal 1(%eax), %edx
+ cmpl $262145, %edx
+ =>
+ cmpl $262144, %eax
+
+//===---------------------------------------------------------------------===//
+
+define i64 @test(double %X) {
+ %Y = fptosi double %X to i64
+ ret i64 %Y
+}
+
+compiles to:
+
+_test:
+ subl $20, %esp
+ movsd 24(%esp), %xmm0
+ movsd %xmm0, 8(%esp)
+ fldl 8(%esp)
+ fisttpll (%esp)
+ movl 4(%esp), %edx
+ movl (%esp), %eax
+ addl $20, %esp
+ #FP_REG_KILL
+ ret
+
+This should just fldl directly from the input stack slot.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+int foo (int x) { return (x & 65535) | 255; }
+
+Should compile into:
+
+_foo:
+ movzwl 4(%esp), %eax
+ orl $255, %eax
+ ret
+
+instead of:
+_foo:
+ movl $255, %eax
+ orl 4(%esp), %eax
+ andl $65535, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+We're codegen'ing multiply of long longs inefficiently:
+
+unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) {
+ return arg1 * arg2;
+}
+
+We compile to (fomit-frame-pointer):
+
+_LLM:
+ pushl %esi
+ movl 8(%esp), %ecx
+ movl 16(%esp), %esi
+ movl %esi, %eax
+ mull %ecx
+ imull 12(%esp), %esi
+ addl %edx, %esi
+ imull 20(%esp), %ecx
+ movl %esi, %edx
+ addl %ecx, %edx
+ popl %esi
+ ret
+
+This looks like a scheduling deficiency and lack of remat of the load from
+the argument area. ICC apparently produces:
+
+ movl 8(%esp), %ecx
+ imull 12(%esp), %ecx
+ movl 16(%esp), %eax
+ imull 4(%esp), %eax
+ addl %eax, %ecx
+ movl 4(%esp), %eax
+ mull 12(%esp)
+ addl %ecx, %edx
+ ret
+
+Note that it remat'd loads from 4(esp) and 12(esp). See this GCC PR:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236
+
+//===---------------------------------------------------------------------===//
+
+We can fold a store into "zeroing a reg". Instead of:
+
+xorl %eax, %eax
+movl %eax, 124(%esp)
+
+we should get:
+
+movl $0, 124(%esp)
+
+if the flags of the xor are dead.
+
+Likewise, we isel "x<<1" into "add reg,reg". If reg is spilled, this should
+be folded into: shl [mem], 1
+
+//===---------------------------------------------------------------------===//
+
+This testcase misses a read/modify/write opportunity (from PR1425):
+
+void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
+ int i;
+ for(i=0; i<width; i++)
+ b1[i] += (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We compile it down to:
+
+LBB1_2: # bb
+ movl (%esi,%edi,4), %ebx
+ addl (%ecx,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ movl %ebx, (%ecx,%edi,4)
+ incl %edi
+ cmpl %eax, %edi
+ jne LBB1_2 # bb
+
+the inner loop should add to the memory location (%ecx,%edi,4), saving
+a mov. Something like:
+
+ movl (%esi,%edi,4), %ebx
+ addl (%edx,%edi,4), %ebx
+ addl %ebx, (%ecx,%edi,4)
+
+Here is another interesting example:
+
+void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
+ int i;
+ for(i=0; i<width; i++)
+ b1[i] -= (1*(b0[i] + b2[i])+0)>>0;
+}
+
+We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]:
+
+LBB9_2: # bb
+ movl (%ecx,%edi,4), %ebx
+ subl (%esi,%edi,4), %ebx
+ subl (%edx,%edi,4), %ebx
+ movl %ebx, (%ecx,%edi,4)
+ incl %edi
+ cmpl %eax, %edi
+ jne LBB9_2 # bb
+
+Additionally, LSR should rewrite the exit condition of these loops to use
+a stride-4 IV, would would allow all the scales in the loop to go away.
+This would result in smaller code and more efficient microops.
+
+//===---------------------------------------------------------------------===//
+
+In SSE mode, we turn abs and neg into a load from the constant pool plus a xor
+or and instruction, for example:
+
+ xorpd LCPI1_0, %xmm2
+
+However, if xmm2 gets spilled, we end up with really ugly code like this:
+
+ movsd (%esp), %xmm0
+ xorpd LCPI1_0, %xmm0
+ movsd %xmm0, (%esp)
+
+Since we 'know' that this is a 'neg', we can actually "fold" the spill into
+the neg/abs instruction, turning it into an *integer* operation, like this:
+
+ xorl 2147483648, [mem+4] ## 2147483648 = (1 << 31)
+
+you could also use xorb, but xorl is less likely to lead to a partial register
+stall. Here is a contrived testcase:
+
+double a, b, c;
+void test(double *P) {
+ double X = *P;
+ a = X;
+ bar();
+ X = -X;
+ b = X;
+ bar();
+ c = X;
+}
+
+//===---------------------------------------------------------------------===//
+
+handling llvm.memory.barrier on pre SSE2 cpus
+
+should generate:
+lock ; mov %esp, %esp
+
+//===---------------------------------------------------------------------===//
+
+The generated code on x86 for checking for signed overflow on a multiply the
+obvious way is much longer than it needs to be.
+
+int x(int a, int b) {
+ long long prod = (long long)a*b;
+ return prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1);
+}
+
+See PR2053 for more details.
+
+//===---------------------------------------------------------------------===//
+
+We should investigate using cdq/ctld (effect: edx = sar eax, 31)
+more aggressively; it should cost the same as a move+shift on any modern
+processor, but it's a lot shorter. Downside is that it puts more
+pressure on register allocation because it has fixed operands.
+
+Example:
+int abs(int x) {return x < 0 ? -x : x;}
+
+gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.:
+abs:
+ movl 4(%esp), %eax
+ cltd
+ xorl %edx, %eax
+ subl %edx, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+int test(unsigned long a, unsigned long b) { return -(a < b); }
+
+We currently compile this to:
+
+define i32 @test(i32 %a, i32 %b) nounwind {
+ %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
+ %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
+and
+
+_test:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setb %al
+ movzbl %al, %eax
+ negl %eax
+ ret
+
+Several deficiencies here. First, we should instcombine zext+neg into sext:
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+ %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1]
+ %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ ret i32 %tmp34
+}
+
+However, before we can do that, we have to fix the bad codegen that we get for
+sext from bool:
+
+_test2:
+ movl 8(%esp), %eax
+ cmpl %eax, 4(%esp)
+ setb %al
+ movzbl %al, %eax
+ shll $31, %eax
+ sarl $31, %eax
+ ret
+
+This code should be at least as good as the code above. Once this is fixed, we
+can optimize this specific case even more to:
+
+ movl 8(%esp), %eax
+ xorl %ecx, %ecx
+ cmpl %eax, 4(%esp)
+ sbbl %ecx, %ecx
+
+//===---------------------------------------------------------------------===//
+
+Take the following code (from
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
+
+extern unsigned char first_one[65536];
+int FirstOnet(unsigned long long arg1)
+{
+ if (arg1 >> 48)
+ return (first_one[arg1 >> 48]);
+ return 0;
+}
+
+
+The following code is currently generated:
+FirstOnet:
+ movl 8(%esp), %eax
+ cmpl $65536, %eax
+ movl 4(%esp), %ecx
+ jb .LBB1_2 # UnifiedReturnBlock
+.LBB1_1: # ifthen
+ shrl $16, %eax
+ movzbl first_one(%eax), %eax
+ ret
+.LBB1_2: # UnifiedReturnBlock
+ xorl %eax, %eax
+ ret
+
+There are a few possible improvements here:
+1. We should be able to eliminate the dead load into %ecx
+2. We could change the "movl 8(%esp), %eax" into
+ "movzwl 10(%esp), %eax"; this lets us change the cmpl
+ into a testl, which is shorter, and eliminate the shift.
+
+We could also in theory eliminate the branch by using a conditional
+for the address of the load, but that seems unlikely to be worthwhile
+in general.
+
+//===---------------------------------------------------------------------===//
+
+We compile this function:
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext %d) nounwind {
+entry:
+ %tmp2 = icmp eq i8 %d, 0 ; <i1> [#uses=1]
+ br i1 %tmp2, label %bb7, label %bb
+
+bb: ; preds = %entry
+ %tmp6 = add i32 %b, %a ; <i32> [#uses=1]
+ ret i32 %tmp6
+
+bb7: ; preds = %entry
+ %tmp10 = sub i32 %a, %c ; <i32> [#uses=1]
+ ret i32 %tmp10
+}
+
+to:
+
+_foo:
+ cmpb $0, 16(%esp)
+ movl 12(%esp), %ecx
+ movl 8(%esp), %eax
+ movl 4(%esp), %edx
+ je LBB1_2 # bb7
+LBB1_1: # bb
+ addl %edx, %eax
+ ret
+LBB1_2: # bb7
+ movl %edx, %eax
+ subl %ecx, %eax
+ ret
+
+The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
+if it commuted the addl in LBB1_1.
+
+//===---------------------------------------------------------------------===//
+
+See rdar://4653682.
+
+From flops:
+
+LBB1_15: # bb310
+ cvtss2sd LCPI1_0, %xmm1
+ addsd %xmm1, %xmm0
+ movsd 176(%esp), %xmm2
+ mulsd %xmm0, %xmm2
+ movapd %xmm2, %xmm3
+ mulsd %xmm3, %xmm3
+ movapd %xmm3, %xmm4
+ mulsd LCPI1_23, %xmm4
+ addsd LCPI1_24, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_25, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_26, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_27, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd LCPI1_28, %xmm4
+ mulsd %xmm3, %xmm4
+ addsd %xmm1, %xmm4
+ mulsd %xmm2, %xmm4
+ movsd 152(%esp), %xmm1
+ addsd %xmm4, %xmm1
+ movsd %xmm1, 152(%esp)
+ incl %eax
+ cmpl %eax, %esi
+ jge LBB1_15 # bb310
+LBB1_16: # bb358.loopexit
+ movsd 152(%esp), %xmm0
+ addsd %xmm0, %xmm0
+ addsd LCPI1_22, %xmm0
+ movsd %xmm0, 152(%esp)
+
+Rather than spilling the result of the last addsd in the loop, we should have
+insert a copy to split the interval (one for the duration of the loop, one
+extending to the fall through). The register pressure in the loop isn't high
+enough to warrant the spill.
+
+Also check why xmm7 is not used at all in the function.
+
+//===---------------------------------------------------------------------===//
+
+Legalize loses track of the fact that bools are always zero extended when in
+memory. This causes us to compile abort_gzip (from 164.gzip) from:
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@in_exit.4870.b = internal global i1 false ; <i1*> [#uses=2]
+define fastcc void @abort_gzip() noreturn nounwind {
+entry:
+ %tmp.b.i = load i1* @in_exit.4870.b ; <i1> [#uses=1]
+ br i1 %tmp.b.i, label %bb.i, label %bb4.i
+bb.i: ; preds = %entry
+ tail call void @exit( i32 1 ) noreturn nounwind
+ unreachable
+bb4.i: ; preds = %entry
+ store i1 true, i1* @in_exit.4870.b
+ tail call void @exit( i32 1 ) noreturn nounwind
+ unreachable
+}
+declare void @exit(i32) noreturn nounwind
+
+into:
+
+_abort_gzip:
+ subl $12, %esp
+ movb _in_exit.4870.b, %al
+ notb %al
+ testb $1, %al
+ jne LBB1_2 ## bb4.i
+LBB1_1: ## bb.i
+ ...
+
+//===---------------------------------------------------------------------===//
+
+We compile:
+
+int test(int x, int y) {
+ return x-y-1;
+}
+
+into (-m64):
+
+_test:
+ decl %edi
+ movl %edi, %eax
+ subl %esi, %eax
+ ret
+
+it would be better to codegen as: x+~y (notl+addl)
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+int foo(const char *str,...)
+{
+ __builtin_va_list a; int x;
+ __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a);
+ return x;
+}
+
+gets compiled into this on x86-64:
+ subq $200, %rsp
+ movaps %xmm7, 160(%rsp)
+ movaps %xmm6, 144(%rsp)
+ movaps %xmm5, 128(%rsp)
+ movaps %xmm4, 112(%rsp)
+ movaps %xmm3, 96(%rsp)
+ movaps %xmm2, 80(%rsp)
+ movaps %xmm1, 64(%rsp)
+ movaps %xmm0, 48(%rsp)
+ movq %r9, 40(%rsp)
+ movq %r8, 32(%rsp)
+ movq %rcx, 24(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 8(%rsp)
+ leaq (%rsp), %rax
+ movq %rax, 192(%rsp)
+ leaq 208(%rsp), %rax
+ movq %rax, 184(%rsp)
+ movl $48, 180(%rsp)
+ movl $8, 176(%rsp)
+ movl 176(%rsp), %eax
+ cmpl $47, %eax
+ jbe .LBB1_3 # bb
+.LBB1_1: # bb3
+ movq 184(%rsp), %rcx
+ leaq 8(%rcx), %rax
+ movq %rax, 184(%rsp)
+.LBB1_2: # bb4
+ movl (%rcx), %eax
+ addq $200, %rsp
+ ret
+.LBB1_3: # bb
+ movl %eax, %ecx
+ addl $8, %eax
+ addq 192(%rsp), %rcx
+ movl %eax, 176(%rsp)
+ jmp .LBB1_2 # bb4
+
+gcc 4.3 generates:
+ subq $96, %rsp
+.LCFI0:
+ leaq 104(%rsp), %rax
+ movq %rsi, -80(%rsp)
+ movl $8, -120(%rsp)
+ movq %rax, -112(%rsp)
+ leaq -88(%rsp), %rax
+ movq %rax, -104(%rsp)
+ movl $8, %eax
+ cmpl $48, %eax
+ jb .L6
+ movq -112(%rsp), %rdx
+ movl (%rdx), %eax
+ addq $96, %rsp
+ ret
+ .p2align 4,,10
+ .p2align 3
+.L6:
+ mov %eax, %edx
+ addq -104(%rsp), %rdx
+ addl $8, %eax
+ movl %eax, -120(%rsp)
+ movl (%rdx), %eax
+ addq $96, %rsp
+ ret
+
+and it gets compiled into this on x86:
+ pushl %ebp
+ movl %esp, %ebp
+ subl $4, %esp
+ leal 12(%ebp), %eax
+ movl %eax, -4(%ebp)
+ leal 16(%ebp), %eax
+ movl %eax, -4(%ebp)
+ movl 12(%ebp), %eax
+ addl $4, %esp
+ popl %ebp
+ ret
+
+gcc 4.3 generates:
+ pushl %ebp
+ movl %esp, %ebp
+ movl 12(%ebp), %eax
+ popl %ebp
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Teach tblgen not to check bitconvert source type in some cases. This allows us
+to consolidate the following patterns in X86InstrMMX.td:
+
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
+There are other cases in various td files.
+
+//===---------------------------------------------------------------------===//
+
+Take something like the following on x86-32:
+unsigned a(unsigned long long x, unsigned y) {return x % y;}
+
+We currently generate a libcall, but we really shouldn't: the expansion is
+shorter and likely faster than the libcall. The expected code is something
+like the following:
+
+ movl 12(%ebp), %eax
+ movl 16(%ebp), %ecx
+ xorl %edx, %edx
+ divl %ecx
+ movl 8(%ebp), %eax
+ divl %ecx
+ movl %edx, %eax
+ ret
+
+A similar code sequence works for division.
+
+//===---------------------------------------------------------------------===//
+
+These should compile to the same code, but the later codegen's to useless
+instructions on X86. This may be a trivial dag combine (GCC PR7061):
+
+struct s1 { unsigned char a, b; };
+unsigned long f1(struct s1 x) {
+ return x.a + x.b;
+}
+struct s2 { unsigned a: 8, b: 8; };
+unsigned long f2(struct s2 x) {
+ return x.a + x.b;
+}
+
+//===---------------------------------------------------------------------===//
+
+We currently compile this:
+
+define i32 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+ %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ %sum = extractvalue {i32, i1} %t, 0
+ %obit = extractvalue {i32, i1} %t, 1
+ br i1 %obit, label %overflow, label %normal
+normal:
+ ret i32 %sum
+overflow:
+ call void @llvm.trap()
+ unreachable
+}
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare void @llvm.trap()
+
+to:
+
+_func1:
+ movl 4(%esp), %eax
+ addl 8(%esp), %eax
+ jo LBB1_2 ## overflow
+LBB1_1: ## normal
+ ret
+LBB1_2: ## overflow
+ ud2
+
+it would be nice to produce "into" someday.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void vec_mpys1(int y[], const int x[], int scaler) {
+int i;
+for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * (long long)x[i]) >> 31);
+}
+
+Compiles to this loop with GCC 3.x:
+
+.L5:
+ movl %ebx, %eax
+ imull (%edi,%ecx,4)
+ shrdl $31, %edx, %eax
+ addl %eax, (%esi,%ecx,4)
+ incl %ecx
+ cmpl $149, %ecx
+ jle .L5
+
+llvm-gcc compiles it to the much uglier:
+
+LBB1_1: ## bb1
+ movl 24(%esp), %eax
+ movl (%eax,%edi,4), %ebx
+ movl %ebx, %ebp
+ imull %esi, %ebp
+ movl %ebx, %eax
+ mull %ecx
+ addl %ebp, %edx
+ sarl $31, %ebx
+ imull %ecx, %ebx
+ addl %edx, %ebx
+ shldl $1, %eax, %ebx
+ movl 20(%esp), %eax
+ addl %ebx, (%eax,%edi,4)
+ incl %edi
+ cmpl $150, %edi
+ jne LBB1_1 ## bb1
+
+//===---------------------------------------------------------------------===//
+
+Test instructions can be eliminated by using EFLAGS values from arithmetic
+instructions. This is currently not done for mul, and, or, xor, neg, shl,
+sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
+for read-modify-write instructions. It is also current not done if the
+OF or CF flags are needed.
+
+The shift operators have the complication that when the shift count is
+zero, EFLAGS is not set, so they can only subsume a test instruction if
+the shift count is known to be non-zero. Also, using the EFLAGS value
+from a shift is apparently very slow on some x86 implementations.
+
+In read-modify-write instructions, the root node in the isel match is
+the store, and isel has no way for the use of the EFLAGS result of the
+arithmetic to be remapped to the new node.
+
+Add and subtract instructions set OF on signed overflow and CF on unsiged
+overflow, while test instructions always clear OF and CF. In order to
+replace a test with an add or subtract in a situation where OF or CF is
+needed, codegen must be able to prove that the operation cannot see
+signed or unsigned overflow, respectively.
+
+//===---------------------------------------------------------------------===//
+
+memcpy/memmove do not lower to SSE copies when possible. A silly example is:
+define <16 x float> @foo(<16 x float> %A) nounwind {
+ %tmp = alloca <16 x float>, align 16
+ %tmp2 = alloca <16 x float>, align 16
+ store <16 x float> %A, <16 x float>* %tmp
+ %s = bitcast <16 x float>* %tmp to i8*
+ %s2 = bitcast <16 x float>* %tmp2 to i8*
+ call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16)
+ %R = load <16 x float>* %tmp2
+ ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+which compiles to:
+
+_foo:
+ subl $140, %esp
+ movaps %xmm3, 112(%esp)
+ movaps %xmm2, 96(%esp)
+ movaps %xmm1, 80(%esp)
+ movaps %xmm0, 64(%esp)
+ movl 60(%esp), %eax
+ movl %eax, 124(%esp)
+ movl 56(%esp), %eax
+ movl %eax, 120(%esp)
+ movl 52(%esp), %eax
+ <many many more 32-bit copies>
+ movaps (%esp), %xmm0
+ movaps 16(%esp), %xmm1
+ movaps 32(%esp), %xmm2
+ movaps 48(%esp), %xmm3
+ addl $140, %esp
+ ret
+
+On Nehalem, it may even be cheaper to just use movups when unaligned than to
+fall back to lower-granularity chunks.
+
+//===---------------------------------------------------------------------===//
+
+Implement processor-specific optimizations for parity with GCC on these
+processors. GCC does two optimizations:
+
+1. ix86_pad_returns inserts a noop before ret instructions if immediately
+ preceeded by a conditional branch or is the target of a jump.
+2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
+ code contains more than 3 branches.
+
+The first one is done for all AMDs, Core2, and "Generic"
+The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
+ Core 2, and "Generic"
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
new file mode 100644
index 000000000000..fd13b02e1367
--- /dev/null
+++ b/lib/Target/X86/X86.h
@@ -0,0 +1,84 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class X86TargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+class JITCodeEmitter;
+class raw_ostream;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createX87FPRegKillInserterPass - This function returns a pass which
+/// inserts FP_REG_KILL instructions where needed.
+///
+FunctionPass *createX87FPRegKillInserterPass();
+
+/// createX86CodePrinterPass - Returns a pass that prints the X86
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description.
+///
+FunctionPass *createX86CodePrinterPass(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+
+FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
+/// which calculates maximal stack alignment required for function
+///
+FunctionPass *createX86MaxStackAlignmentCalculatorPass();
+
+} // End llvm namespace
+
+// Defines symbolic names for X86 registers. This defines a mapping from
+// register name to register number.
+//
+#include "X86GenRegisterNames.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#include "X86GenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
new file mode 100644
index 000000000000..8df138d8d7a7
--- /dev/null
+++ b/lib/Target/X86/X86.td
@@ -0,0 +1,184 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, refered to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+ "Enable MMX instructions">;
+def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+ "Enable SSE instructions",
+ [FeatureMMX]>;
+def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+ "Enable SSE2 instructions",
+ [FeatureSSE1]>;
+def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+ "Enable SSE3 instructions",
+ [FeatureSSE2]>;
+def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+ "Enable SSSE3 instructions",
+ [FeatureSSE3]>;
+def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+ "Enable SSE 4.1 instructions",
+ [FeatureSSSE3]>;
+def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+ "Enable SSE 4.2 instructions",
+ [FeatureSSE41]>;
+def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+ "Enable 3DNow! instructions">;
+def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+ "Enable 3DNow! Athlon instructions",
+ [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
+ "Support 64-bit instructions">;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
+def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+ "Support SSE 4a instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+def : Proc<"i386", []>;
+def : Proc<"i486", []>;
+def : Proc<"i586", []>;
+def : Proc<"pentium", []>;
+def : Proc<"pentium-mmx", [FeatureMMX]>;
+def : Proc<"i686", []>;
+def : Proc<"pentiumpro", []>;
+def : Proc<"pentium2", [FeatureMMX]>;
+def : Proc<"pentium3", [FeatureSSE1]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSSE2]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+
+def : Proc<"k6", [FeatureMMX]>;
+def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, Feature64Bit,
+ FeatureSlowBTMem]>;
+def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
+ Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+
+def : Proc<"winchip-c6", [FeatureMMX]>;
+def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo {
+
+ // Define how we want to layout our TargetSpecific information field... This
+ // should be kept up-to-date with the fields in the X86InstrInfo.h file.
+ let TSFlagsFields = ["FormBits",
+ "hasOpSizePrefix",
+ "hasAdSizePrefix",
+ "Prefix",
+ "hasREX_WPrefix",
+ "ImmTypeBits",
+ "FPFormBits",
+ "hasLockPrefix",
+ "SegOvrBits",
+ "Opcode"];
+ let TSFlagsShifts = [0,
+ 6,
+ 7,
+ 8,
+ 12,
+ 13,
+ 16,
+ 19,
+ 20,
+ 24];
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+ string AsmWriterClassName = "ATTAsmPrinter";
+ int Variant = 0;
+}
+def IntelAsmWriter : AsmWriter {
+ string AsmWriterClassName = "IntelAsmPrinter";
+ int Variant = 1;
+}
+
+
+def X86 : Target {
+ // Information about the instructions...
+ let InstructionSet = X86InstrInfo;
+
+ let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
new file mode 100644
index 000000000000..0a8e4e6ac6db
--- /dev/null
+++ b/lib/Target/X86/X86COFF.h
@@ -0,0 +1,95 @@
+//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file just defines some symbols found in COFF documentation. They are
+// used to emit function type information for COFF targets (Cygwin/Mingw32).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_H
+#define X86COFF_H
+
+namespace COFF
+{
+/// Storage class tells where and what the symbol represents
+enum StorageClass {
+ C_EFCN = -1, ///< Physical end of function
+ C_NULL = 0, ///< No symbol
+ C_AUTO = 1, ///< External definition
+ C_EXT = 2, ///< External symbol
+ C_STAT = 3, ///< Static
+ C_REG = 4, ///< Register variable
+ C_EXTDEF = 5, ///< External definition
+ C_LABEL = 6, ///< Label
+ C_ULABEL = 7, ///< Undefined label
+ C_MOS = 8, ///< Member of structure
+ C_ARG = 9, ///< Function argument
+ C_STRTAG = 10, ///< Structure tag
+ C_MOU = 11, ///< Member of union
+ C_UNTAG = 12, ///< Union tag
+ C_TPDEF = 13, ///< Type definition
+ C_USTATIC = 14, ///< Undefined static
+ C_ENTAG = 15, ///< Enumeration tag
+ C_MOE = 16, ///< Member of enumeration
+ C_REGPARM = 17, ///< Register parameter
+ C_FIELD = 18, ///< Bit field
+
+ C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block
+ C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function
+ C_EOS = 102, ///< End of structure
+ C_FILE = 103, ///< File name
+ C_LINE = 104, ///< Line number, reformatted as symbol
+ C_ALIAS = 105, ///< Duplicate tag
+ C_HIDDEN = 106 ///< External symbol in dmert public lib
+};
+
+/// The type of the symbol. This is made up of a base type and a derived type.
+/// For example, pointer to int is "pointer to T" and "int"
+enum SymbolType {
+ T_NULL = 0, ///< No type info
+ T_ARG = 1, ///< Void function argument (only used by compiler)
+ T_VOID = 1, ///< The same as above. Just named differently in some specs.
+ T_CHAR = 2, ///< Character
+ T_SHORT = 3, ///< Short integer
+ T_INT = 4, ///< Integer
+ T_LONG = 5, ///< Long integer
+ T_FLOAT = 6, ///< Floating point
+ T_DOUBLE = 7, ///< Double word
+ T_STRUCT = 8, ///< Structure
+ T_UNION = 9, ///< Union
+ T_ENUM = 10, ///< Enumeration
+ T_MOE = 11, ///< Member of enumeration
+ T_UCHAR = 12, ///< Unsigned character
+ T_USHORT = 13, ///< Unsigned short
+ T_UINT = 14, ///< Unsigned integer
+ T_ULONG = 15 ///< Unsigned long
+};
+
+/// Derived type of symbol
+enum SymbolDerivedType {
+ DT_NON = 0, ///< No derived type
+ DT_PTR = 1, ///< Pointer to T
+ DT_FCN = 2, ///< Function returning T
+ DT_ARY = 3 ///< Array of T
+};
+
+/// Masks for extracting parts of type
+enum SymbolTypeMasks {
+ N_BTMASK = 017, ///< Mask for base type
+ N_TMASK = 060 ///< Mask for derived type
+};
+
+/// Offsets of parts of type
+enum Shifts {
+ N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
+};
+
+}
+
+#endif // X86COFF_H
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 000000000000..7f99203a8333
--- /dev/null
+++ b/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,360 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+ // Scalar values are returned in AX first, then DX. For i8, the ABI
+ // requires the values to be in AL and AH, however this code uses AL and DL
+ // instead. This is because using AH for the second register conflicts with
+ // the way LLVM does multiple return values -- a return of {i16,i8} would end
+ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+ // for functions that return two i8 values are currently expected to pack the
+ // values into an i16 (which uses AX, and thus AL:AH).
+ CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+ CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3
+ // can only be used by ABI non-compliant code. If the target doesn't have XMM
+ // registers, it won't have vector types.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // MMX vector types are always returned in MM0. If the target doesn't have
+ // MM0, it doesn't support these vector types.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>,
+
+ // Long double types are always returned in ST0 (even with SSE).
+ CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+ // The X86-32 calling convention returns FP values in ST0, unless marked
+ // with "inreg" (used here to distinguish one kind of reg from another,
+ // weirdly; this is really the sse-regparm calling convention) in which
+ // case they use XMM0, otherwise it is the same as the common X86 calling
+ // conv.
+ CCIfInReg<CCIfSubtarget<"hasSSE2()",
+ CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+ CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+ // SSE2, otherwise it is the the C calling conventions.
+ // This can happen when a float, 2 x float, or 3 x float vector is split by
+ // target lowering, and is returned in 1-3 sse regs.
+ CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+ // The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+ // MMX vector types are always returned in XMM0 except for v1i64 which is
+ // returned in RAX. This disagrees with ABI documentation but is bug
+ // compatible with gcc.
+ CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+ // The X86-Win64 calling convention always returns __m64 values in RAX.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>,
+
+ // And FP in XMM0 only.
+ CCIfType<[f32], CCAssignToReg<[XMM0]>>,
+ CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+
+ // Otherwise, everything is the same as 'normal' X86-64 C CC.
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+ // If FastCC, use RetCC_X86_32_Fast.
+ CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+ // Otherwise, use RetCC_X86_32_C.
+ CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+ // Mingw64 and native Win64 use Win64 CC
+ CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+ // Otherwise, drop to normal X86-64 CC
+ CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+ CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+ CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
+ // registers on Darwin.
+ CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>,
+
+ // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
+ CCIfType<[v1i64],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+ // FIXME: Handle byval stuff.
+ // FIXME: Handle varargs.
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // The first 4 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+ [RCX , RDX , R8 , R9 ]>>,
+
+ // The first 4 MMX vector arguments are passed in GPRs.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+ CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
+ [XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 16-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
+
+ // Long doubles get stack slots whose size and alignment depends on the
+ // subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 16-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
+]>;
+
+// Tail call convention (fast): One register is reserved for target address,
+// namely R9
+def CC_X86_64_TailCall : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in R10.
+ CCIfNest<CCAssignToReg<[R10]>>,
+
+ // The first 6 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
+ CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
+
+ // The first 8 FP/Vector arguments are passed in XMM registers.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
+ // registers on Darwin.
+ CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+ // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
+ CCIfType<[v1i64],
+ CCIfSubtarget<"isTargetDarwin()",
+ CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
+
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // The first 3 float or double arguments, if marked 'inreg' and if the call
+ // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
+ // registers if the call is not a vararg call.
+ CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+ // Integer/Float values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Doubles get 8-byte slots that are 4-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+ // Long doubles get slots whose size depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+ // The first 4 SSE vector arguments are passed in XMM registers.
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+ // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+ // passed in the parameter area.
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in ECX.
+ CCIfNest<CCAssignToReg<[ECX]>>,
+
+ // The first 3 integer arguments, if marked 'inreg' and if the call is not
+ // a vararg call, are passed in integer registers.
+ CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+ // Handles byval parameters. Note that we can't rely on the delegation
+ // to CC_X86_32_Common for this because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first 2 integer arguments are passed in ECX/EDX
+ CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+ // The first 3 float or double arguments, if the call is not a vararg
+ // call and if SSE2 is available, are passed in SSE registers.
+ CCIfNotVarArg<CCIfType<[f32,f64],
+ CCIfSubtarget<"hasSSE2()",
+ CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+ // Doubles get 8-byte slots that are 8-byte aligned.
+ CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 000000000000..e988a5ca9d04
--- /dev/null
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,811 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+template<class CodeEmitter>
+ class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
+ const X86InstrInfo *II;
+ const TargetData *TD;
+ X86TargetMachine &TM;
+ CodeEmitter &MCE;
+ intptr_t PICBaseOffset;
+ bool Is64BitMode;
+ bool IsPIC;
+ public:
+ static char ID;
+ explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+ : MachineFunctionPass(&ID), II(0), TD(0), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+ const X86InstrInfo &ii, const TargetData &td, bool is64)
+ : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm),
+ MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "X86 Machine Code Emitter";
+ }
+
+ void emitInstruction(const MachineInstr &MI,
+ const TargetInstrDesc *Desc);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+ void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp = 0, intptr_t PCAdj = 0,
+ bool NeedStub = false, bool Indirect = false);
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+ intptr_t PCAdj = 0);
+ void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj = 0);
+
+ void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+ intptr_t PCAdj = 0);
+
+ void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+ void emitRegModRMByte(unsigned RegOpcodeField);
+ void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+ void emitConstant(uint64_t Val, unsigned Size);
+
+ void emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj = 0);
+
+ unsigned getX86RegNum(unsigned RegNo) const;
+
+ bool gvNeedsNonLazyPtr(const GlobalValue *GV);
+ };
+
+template<class CodeEmitter>
+ char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+
+namespace llvm {
+
+FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM,
+ MachineCodeEmitter &MCE) {
+ return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+} // end namespace llvm
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+
+ II = TM.getInstrInfo();
+ TD = TM.getTargetData();
+ Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+ IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ do {
+ DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB) {
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ const TargetInstrDesc &Desc = I->getDesc();
+ emitInstruction(*I, &Desc);
+ // MOVPC32r is basically a call plus a pop instruction.
+ if (Desc.getOpcode() == X86::MOVPC32r)
+ emitInstruction(*I, &II->get(X86::POP32r));
+ NumEmitted++; // Keep track of the # of mi's emitted
+ }
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+ // Remember where this reference was and where it is to so we can
+ // deal with it later.
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ X86::reloc_pcrel_word, MBB));
+ MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */,
+ bool NeedStub /* = false */,
+ bool Indirect /* = false */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MachineRelocation MR = Indirect
+ ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+ GV, RelocCST, NeedStub)
+ : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ GV, RelocCST, NeedStub);
+ MCE.addRelocation(MR);
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+ unsigned Reloc) {
+ intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, RelocCST));
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+ intptr_t Disp /* = 0 */,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(Disp);
+ else
+ MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+ intptr_t PCAdj /* = 0 */) {
+ intptr_t RelocCST = 0;
+ if (Reloc == X86::reloc_picrel_word)
+ RelocCST = PICBaseOffset;
+ else if (Reloc == X86::reloc_pcrel_word)
+ RelocCST = PCAdj;
+ MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ Reloc, JTI, RelocCST));
+ // The relocated value will be added to the displacement
+ if (Reloc == X86::reloc_absolute_dword)
+ MCE.emitDWordLE(0);
+ else
+ MCE.emitWordLE(0);
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
+ return II->getRegisterInfo().getX86RegNum(RegNo);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+ unsigned RM) {
+ assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+ return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+ unsigned RegOpcodeFld){
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+ MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
+ unsigned Index,
+ unsigned Base) {
+ // SIB byte is in the same format as the ModRMByte...
+ MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+ // Output the constant in little endian byte order...
+ for (unsigned i = 0; i != Size; ++i) {
+ MCE.emitByte(Val & 255);
+ Val >>= 8;
+ }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+ return Value == (signed char)Value;
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
+ // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer
+ // mechanism as 32-bit mode.
+ return (!Is64BitMode || TM.getSubtarget<X86Subtarget>().isTargetDarwin()) &&
+ TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+ int DispVal, intptr_t PCAdj) {
+ // If this is a simple integer displacement that doesn't require a relocation,
+ // emit it now.
+ if (!RelocOp) {
+ emitConstant(DispVal, 4);
+ return;
+ }
+
+ // Otherwise, this is something that requires a relocation. Emit it as such
+ // now.
+ if (RelocOp->isGlobal()) {
+ // In 64-bit static small code model, we could potentially emit absolute.
+ // But it's probably not beneficial.
+ // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
+ // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ bool NeedStub = isa<Function>(RelocOp->getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal());
+ emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(),
+ PCAdj, NeedStub, Indirect);
+ } else if (RelocOp->isCPI()) {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
+ emitConstPoolAddress(RelocOp->getIndex(), rt,
+ RelocOp->getOffset(), PCAdj);
+ } else if (RelocOp->isJTI()) {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word;
+ emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj);
+ } else {
+ assert(0 && "Unknown value to relocate!");
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+ unsigned Op, unsigned RegOpcodeField,
+ intptr_t PCAdj) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+ DispVal += Op3.getOffset();
+ }
+ } else if (Op3.isJTI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+ }
+ } else {
+ DispVal = Op3.getImm();
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &Scale = MI.getOperand(Op+1);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Is a SIB byte needed?
+ if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
+ IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
+ if (BaseReg == 0) { // Just a displacement?
+ // Emit special case [disp32] encoding
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit simple indirect register encoding... [EAX] f.e.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+ } else if (!DispForReloc && isDisp8(DispVal)) {
+ // Emit the disp8 encoding... [REG+disp8]
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+ emitConstant(DispVal, 1);
+ } else {
+ // Emit the most general non-SIB encoding: [REG+disp32]
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+
+ } else { // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ bool ForceDisp8 = false;
+ if (BaseReg == 0) {
+ // If there is no base register, we emit the special case SIB byte with
+ // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispForReloc) {
+ // Emit the normal disp32 encoding.
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ ForceDisp32 = true;
+ } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
+ // Emit no displacement ModR/M byte
+ MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+ } else if (isDisp8(DispVal)) {
+ // Emit the disp8 encoding...
+ MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+ ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP
+ } else {
+ // Emit the normal disp32 encoding...
+ MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+ }
+
+ // Calculate what the SS field value should be...
+ static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+ unsigned SS = SSTable[Scale.getImm()];
+
+ if (BaseReg == 0) {
+ // Handle the SIB byte for the case where there is no base. The
+ // displacement has already been output.
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, 5);
+ } else {
+ unsigned BaseRegNo = getX86RegNum(BaseReg);
+ unsigned IndexRegNo;
+ if (IndexReg.getReg())
+ IndexRegNo = getX86RegNum(IndexReg.getReg());
+ else
+ IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
+ emitSIBByte(SS, IndexRegNo, BaseRegNo);
+ }
+
+ // Do we need to output a displacement?
+ if (ForceDisp8) {
+ emitConstant(DispVal, 1);
+ } else if (DispVal != 0 || ForceDisp32) {
+ emitDisplacementField(DispForReloc, DispVal, PCAdj);
+ }
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(
+ const MachineInstr &MI,
+ const TargetInstrDesc *Desc) {
+ DOUT << MI;
+
+ unsigned Opcode = Desc->Opcode;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0);
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ default: assert(0 && "Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3);
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66);
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67);
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ MCE.emitByte(0xF3);
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ MCE.emitByte(0xF2);
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ MCE.emitByte(0xD8+
+ (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+ >> X86II::Op0Shift));
+ break; // Two-byte opcode prefix
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = X86InstrInfo::determineREX(MI);
+ if (REX)
+ MCE.emitByte(0x40 | REX);
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ MCE.emitByte(0x0F);
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+ ++CurOp;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc);
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ assert(0 && "psuedo instructions should be removed before code emission");
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ assert(0 && "JIT does not support inline asm!\n");
+ abort();
+ }
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getImm());
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case X86::DWARF_LOC:
+ case X86::FP_REG_KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ MCE.emitByte(BaseOpcode);
+ emitConstant(0, X86InstrInfo::sizeOfImm(Desc));
+ // Remember PIC base.
+ PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+ X86JITInfo *JTI = TM.getJITInfo();
+ JTI->setPICBase(MCE.getCurrentPCValue());
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm:
+ MCE.emitByte(BaseOpcode);
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+
+ DOUT << "RawFrm CurOp " << CurOp << "\n";
+ DOUT << "isMBB " << MO.isMBB() << "\n";
+ DOUT << "isGlobal " << MO.isGlobal() << "\n";
+ DOUT << "isSymbol " << MO.isSymbol() << "\n";
+ DOUT << "isImm " << MO.isImm() << "\n";
+
+ if (MO.isMBB()) {
+ emitPCRelativeBlockAddress(MO.getMBB());
+ } else if (MO.isGlobal()) {
+ // Assume undefined functions may be outside the Small codespace.
+ bool NeedStub =
+ (Is64BitMode &&
+ (TM.getCodeModel() == CodeModel::Large ||
+ TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
+ Opcode == X86::TAILJMPd;
+ emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+ MO.getOffset(), 0, NeedStub);
+ } else if (MO.isSymbol()) {
+ emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+ } else if (MO.isImm()) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ // Fix up immediate operand for pc relative calls.
+ intptr_t Imm = (intptr_t)MO.getImm();
+ Imm = Imm - MCE.getCurrentPCValue() - 4;
+ emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc));
+ } else
+ emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc));
+ } else {
+ assert(0 && "Unknown RawFrm operand!");
+ }
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ // This should not occur on Darwin for relocatable objects.
+ if (Opcode == X86::MOV64ri)
+ rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ }
+ }
+ break;
+
+ case X86II::MRMDestReg: {
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+ case X86II::MRMDestMem: {
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp,
+ getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)
+ .getReg()));
+ CurOp += X86AddrNumOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ MCE.emitByte(BaseOpcode);
+ emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ CurOp += 2;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
+ break;
+
+ case X86II::MRMSrcMem: {
+ // FIXME: Maybe lea should have its own form?
+ int AddrOperands;
+ if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ AddrOperands = X86AddrNumOperands - 1; // No segment register
+ else
+ AddrOperands = X86AddrNumOperands;
+
+ intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+ X86InstrInfo::sizeOfImm(Desc) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+ PCAdj);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps)
+ emitConstant(MI.getOperand(CurOp++).getImm(),
+ X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r: {
+ MCE.emitByte(BaseOpcode);
+
+ // Special handling of lfence, mfence, monitor, and mwait.
+ if (Desc->getOpcode() == X86::LFENCE ||
+ Desc->getOpcode() == X86::MFENCE ||
+ Desc->getOpcode() == X86::MONITOR ||
+ Desc->getOpcode() == X86::MWAIT) {
+ emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+ switch (Desc->getOpcode()) {
+ default: break;
+ case X86::MONITOR:
+ MCE.emitByte(0xC8);
+ break;
+ case X86::MWAIT:
+ MCE.emitByte(0xC9);
+ break;
+ }
+ } else {
+ emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+ (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+ }
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ emitConstant(MO1.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64ri32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO1.isGlobal()) {
+ bool NeedStub = isa<Function>(MO1.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal());
+ emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO1.isSymbol())
+ emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+ else if (MO1.isCPI())
+ emitConstPoolAddress(MO1.getIndex(), rt);
+ else if (MO1.isJTI())
+ emitJumpTableAddress(MO1.getIndex(), rt);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ?
+ X86InstrInfo::sizeOfImm(Desc) : 4) : 0;
+
+ MCE.emitByte(BaseOpcode);
+ emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+ PCAdj);
+ CurOp += X86AddrNumOperands;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO.isImm())
+ emitConstant(MO.getImm(), Size);
+ else {
+ unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+ : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+ if (Opcode == X86::MOV64mi32)
+ rt = X86::reloc_absolute_word; // FIXME: add X86II flag?
+ if (MO.isGlobal()) {
+ bool NeedStub = isa<Function>(MO.getGlobal());
+ bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal());
+ emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+ NeedStub, Indirect);
+ } else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), rt);
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), rt);
+ else if (MO.isJTI())
+ emitJumpTableAddress(MO.getIndex(), rt);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ MCE.emitByte(BaseOpcode);
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+ getX86RegNum(MI.getOperand(CurOp).getReg()));
+ ++CurOp;
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+ cerr << "Cannot encode: ";
+ MI.dump();
+ cerr << '\n';
+ abort();
+ }
+}
+
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
new file mode 100644
index 000000000000..8002f98765f0
--- /dev/null
+++ b/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -0,0 +1,67 @@
+;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---===
+;;
+;; The LLVM Compiler Infrastructure
+;;
+;; This file is distributed under the University of Illinois Open Source
+;; License. See LICENSE.TXT for details.
+;;
+;;===----------------------------------------------------------------------===
+;;
+;; This file implements the JIT interfaces for the X86 target.
+;;
+;;===----------------------------------------------------------------------===
+
+extrn X86CompilationCallback2: PROC
+
+.code
+X86CompilationCallback proc
+ push rbp
+
+ ; Save RSP
+ mov rbp, rsp
+
+ ; Save all int arg registers
+ push rcx
+ push rdx
+ push r8
+ push r9
+
+ ; Align stack on 16-byte boundary.
+ and rsp, -16
+
+ ; Save all XMM arg registers
+ sub rsp, 64
+ movaps [rsp], xmm0
+ movaps [rsp+16], xmm1
+ movaps [rsp+32], xmm2
+ movaps [rsp+48], xmm3
+
+ ; JIT callee
+
+ ; Pass prev frame and return address
+ mov rcx, rbp
+ mov rdx, qword ptr [rbp+8]
+ call X86CompilationCallback2
+
+ ; Restore all XMM arg registers
+ movaps xmm3, [rsp+48]
+ movaps xmm2, [rsp+32]
+ movaps xmm1, [rsp+16]
+ movaps xmm0, [rsp]
+
+ ; Restore RSP
+ mov rsp, rbp
+
+ ; Restore all int arg registers
+ sub rsp, 32
+ pop r9
+ pop r8
+ pop rdx
+ pop rcx
+
+ ; Restore RBP
+ pop rbp
+ ret
+X86CompilationCallback endp
+
+End
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 000000000000..4c3cc828f46e
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,18 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+using namespace llvm;
+
+X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 000000000000..06e051a34ac9
--- /dev/null
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,29 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+ class X86ELFWriterInfo : public TargetELFWriterInfo {
+ public:
+ X86ELFWriterInfo();
+ virtual ~X86ELFWriterInfo();
+ };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 000000000000..b3667be0d035
--- /dev/null
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,1549 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// StackPtr - Register used as the stack pointer.
+ ///
+ unsigned StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf64;
+ bool X86ScalarSSEf32;
+
+public:
+ explicit X86FastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ )
+ : FastISel(mf, mmi, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ ) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ }
+
+ virtual bool TargetSelectInstruction(Instruction *I);
+
+#include "X86GenFastISel.inc"
+
+private:
+ bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT);
+
+ bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR);
+
+ bool X86FastEmitStore(MVT VT, Value *Val,
+ const X86AddressMode &AM);
+ bool X86FastEmitStore(MVT VT, unsigned Val,
+ const X86AddressMode &AM);
+
+ bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT,
+ unsigned &ResultReg);
+
+ bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall);
+
+ bool X86SelectLoad(Instruction *I);
+
+ bool X86SelectStore(Instruction *I);
+
+ bool X86SelectCmp(Instruction *I);
+
+ bool X86SelectZExt(Instruction *I);
+
+ bool X86SelectBranch(Instruction *I);
+
+ bool X86SelectShift(Instruction *I);
+
+ bool X86SelectSelect(Instruction *I);
+
+ bool X86SelectTrunc(Instruction *I);
+
+ bool X86SelectFPExt(Instruction *I);
+ bool X86SelectFPTrunc(Instruction *I);
+
+ bool X86SelectExtractValue(Instruction *I);
+
+ bool X86VisitIntrinsicCall(IntrinsicInst &I);
+ bool X86SelectCall(Instruction *I);
+
+ CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false);
+
+ const X86InstrInfo *getInstrInfo() const {
+ return getTargetMachine()->getInstrInfo();
+ }
+ const X86TargetMachine *getTargetMachine() const {
+ return static_cast<const X86TargetMachine *>(&TM);
+ }
+
+ unsigned TargetMaterializeConstant(Constant *C);
+
+ unsigned TargetMaterializeAlloca(AllocaInst *C);
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(MVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // For now, require SSE/SSE2 for performing floating-point operations,
+ // since x87 requires additional work.
+ if (VT == MVT::f64 && !X86ScalarSSEf64)
+ return false;
+ if (VT == MVT::f32 && !X86ScalarSSEf32)
+ return false;
+ // Similarly, no f80 support yet.
+ if (VT == MVT::f80)
+ return false;
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
+/// convention.
+CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return CC_X86_Win64_C;
+ else if (CC == CallingConv::Fast && isTaillCall)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+ }
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast)
+ return CC_X86_32_FastCC;
+ else
+ return CC_X86_32_C;
+}
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM,
+ unsigned &ResultReg) {
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(MVT VT, unsigned Val,
+ const X86AddressMode &AM) {
+ // Get opcode and regclass of the output for the given store instruction.
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ case MVT::f80: // No f80 support yet.
+ default: return false;
+ case MVT::i8: Opc = X86::MOV8mr; break;
+ case MVT::i16: Opc = X86::MOV16mr; break;
+ case MVT::i32: Opc = X86::MOV32mr; break;
+ case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::f32:
+ Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ break;
+ case MVT::f64:
+ Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ break;
+ }
+
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val,
+ const X86AddressMode &AM) {
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Val))
+ Val = Constant::getNullValue(TD.getIntPtrType());
+
+ // If this is a store of a simple constant, fold the constant into the store.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+ unsigned Opc = 0;
+ switch (VT.getSimpleVT()) {
+ default: break;
+ case MVT::i8: Opc = X86::MOV8mi; break;
+ case MVT::i16: Opc = X86::MOV16mi; break;
+ case MVT::i32: Opc = X86::MOV32mi; break;
+ case MVT::i64:
+ // Must be a 32-bit sign extended value.
+ if ((int)CI->getSExtValue() == CI->getSExtValue())
+ Opc = X86::MOV64mi32;
+ break;
+ }
+
+ if (Opc) {
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
+ .addImm(CI->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned ValReg = getRegForValue(Val);
+ if (ValReg == 0)
+ return false;
+
+ return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT,
+ unsigned Src, MVT SrcVT,
+ unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
+ User *U;
+ unsigned Opcode = Instruction::UserOp1;
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts.
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
+
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ break;
+
+ case Instruction::Alloca: {
+ if (isCall) break;
+ // Do static allocas.
+ const AllocaInst *A = cast<AllocaInst>(V);
+ DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
+ if (SI != StaticAllocaMap.end()) {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = SI->second;
+ return true;
+ }
+ break;
+ }
+
+ case Instruction::Add: {
+ if (isCall) break;
+ // Adds of constants are common and easy enough.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+ // They have to fit in the 32-bit signed displacement field though.
+ if (isInt32(Disp)) {
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ }
+ }
+ break;
+ }
+
+ case Instruction::GetElementPtr: {
+ if (isCall) break;
+ // Pattern-match simple GEPs.
+ uint64_t Disp = (int32_t)AM.Disp;
+ unsigned IndexReg = AM.IndexReg;
+ unsigned Scale = AM.Scale;
+ gep_type_iterator GTI = gep_type_begin(U);
+ // Iterate through the indices, folding what we can. Constants can be
+ // folded, and one dynamic index can be handled, if the scale is supported.
+ for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ Disp += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ } else if (IndexReg == 0 &&
+ (!AM.GV ||
+ !getTargetMachine()->symbolicAddressesAreRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op);
+ if (IndexReg == 0)
+ return false;
+ } else
+ // Unsupported.
+ goto unsupported_gep;
+ }
+ }
+ // Check for displacement overflow.
+ if (!isInt32(Disp))
+ break;
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ AM.IndexReg = IndexReg;
+ AM.Scale = Scale;
+ AM.Disp = (uint32_t)Disp;
+ return X86SelectAddress(U->getOperand(0), AM, isCall);
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
+ }
+ }
+
+ // Handle constant address.
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Default &&
+ TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // RIP-relative addresses can't have additional register operands.
+ if (getTargetMachine()->symbolicAddressesAreRIPRel() &&
+ (AM.Base.Reg != 0 || AM.IndexReg != 0))
+ return false;
+
+ // Can't handle TLS yet.
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Set up the basic address.
+ AM.GV = GV;
+ if (!isCall &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit())
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+
+ // Emit an extra load if the ABI requires it.
+ if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
+ // Check to see if we've already materialized this
+ // value in a register in this block.
+ if (unsigned Reg = LocalValueMap[V]) {
+ AM.Base.Reg = Reg;
+ AM.GV = 0;
+ return true;
+ }
+ // Issue load from stub if necessary.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (TLI.getPointerTy() == MVT::i32) {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ } else {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ }
+
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = AM.GV;
+ unsigned ResultReg = createResultReg(RC);
+ addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = ResultReg;
+ AM.GV = 0;
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = AM.Base.Reg;
+ }
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(Instruction* I) {
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(1), AM, false))
+ return false;
+
+ return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(I->getOperand(0), AM, false))
+ return false;
+
+ unsigned ResultReg = 0;
+ if (X86FastEmitLoad(VT, AM, ResultReg)) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ return false;
+}
+
+static unsigned X86ChooseCmpOpcode(MVT VT) {
+ switch (VT.getSimpleVT()) {
+ default: return 0;
+ case MVT::i8: return X86::CMP8rr;
+ case MVT::i16: return X86::CMP16rr;
+ case MVT::i32: return X86::CMP32rr;
+ case MVT::i64: return X86::CMP64rr;
+ case MVT::f32: return X86::UCOMISSrr;
+ case MVT::f64: return X86::UCOMISDrr;
+ }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) {
+ switch (VT.getSimpleVT()) {
+ // Otherwise, we can't fold the immediate into this comparison.
+ default: return 0;
+ case MVT::i8: return X86::CMP8ri;
+ case MVT::i16: return X86::CMP16ri;
+ case MVT::i32: return X86::CMP32ri;
+ case MVT::i64:
+ // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+ // field.
+ if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ return X86::CMP64ri32;
+ return 0;
+ }
+}
+
+bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) {
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (Op0Reg == 0) return false;
+
+ // Handle 'null' like i32/i64 0.
+ if (isa<ConstantPointerNull>(Op1))
+ Op1 = Constant::getNullValue(TD.getIntPtrType());
+
+ // We have two options: compare with register or immediate. If the RHS of
+ // the compare is an immediate that we can fold into this compare, use
+ // CMPri, otherwise use CMPrr.
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+ BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
+ return true;
+ }
+ }
+
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT);
+ if (CompareOpc == 0) return false;
+
+ unsigned Op1Reg = getRegForValue(Op1);
+ if (Op1Reg == 0) return false;
+ BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
+
+ return true;
+}
+
+bool X86FastISel::X86SelectCmp(Instruction *I) {
+ CmpInst *CI = cast<CmpInst>(I);
+
+ MVT VT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+ unsigned SetCCOpc;
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ switch (CI->getPredicate()) {
+ case CmpInst::FCMP_OEQ: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned EReg = createResultReg(&X86::GR8RegClass);
+ unsigned NPReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
+ BuildMI(MBB, DL,
+ TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_UNE: {
+ if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ return false;
+
+ unsigned NEReg = createResultReg(&X86::GR8RegClass);
+ unsigned PReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+ default:
+ return false;
+ }
+
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of Op0/Op1.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectZExt(Instruction *I) {
+ // Handle zero-extension from i1 to i8, which is common.
+ if (I->getType() == Type::Int8Ty &&
+ I->getOperand(0)->getType() == Type::Int1Ty) {
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0) return false;
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+ if (ResultReg == 0) return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool X86FastISel::X86SelectBranch(Instruction *I) {
+ // Unconditional branches are selected by tablegen-generated code.
+ // Handle a conditional branch.
+ BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
+
+ // Fold the common case of a conditional branch with a comparison.
+ if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ if (CI->hasOneUse()) {
+ MVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
+ unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+ switch (Predicate) {
+ case CmpInst::FCMP_OEQ:
+ std::swap(TrueMBB, FalseMBB);
+ Predicate = CmpInst::FCMP_UNE;
+ // FALL THROUGH
+ case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break;
+ case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
+ case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break;
+ case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break;
+ case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
+ case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break;
+ case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break;
+ case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break;
+ case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break;
+ case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
+ case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+
+ case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break;
+ case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break;
+ case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break;
+ case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
+ case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break;
+ case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+ case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break;
+ case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
+ case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break;
+ case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
+ default:
+ return false;
+ }
+
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if (SwapArgs)
+ std::swap(Op0, Op1);
+
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(Op0, Op1, VT))
+ return false;
+
+ BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
+
+ if (Predicate == CmpInst::FCMP_UNE) {
+ // X86 requires a second branch to handle UNE (and OEQ,
+ // which is mapped to UNE above).
+ BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
+ }
+
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ } else if (ExtractValueInst *EI =
+ dyn_cast<ExtractValueInst>(BI->getCondition())) {
+ // Check to see if the branch instruction is from an "arithmetic with
+ // overflow" intrinsic. The main way these intrinsics are used is:
+ //
+ // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+ // %sum = extractvalue { i32, i1 } %t, 0
+ // %obit = extractvalue { i32, i1 } %t, 1
+ // br i1 %obit, label %overflow, label %normal
+ //
+ // The %sum and %obit are converted in an ADD and a SETO/SETB before
+ // reaching the branch. Therefore, we search backwards through the MBB
+ // looking for the SETO/SETB instruction. If an instruction modifies the
+ // EFLAGS register before we reach the SETO/SETB instruction, then we can't
+ // convert the branch into a JO/JB instruction.
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
+ if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
+ CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
+ const MachineInstr *SetMI = 0;
+ unsigned Reg = lookUpRegForValue(EI);
+
+ for (MachineBasicBlock::const_reverse_iterator
+ RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
+ const MachineInstr &MI = *RI;
+
+ if (MI.modifiesRegister(Reg)) {
+ unsigned Src, Dst, SrcSR, DstSR;
+
+ if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
+ Reg = Src;
+ continue;
+ }
+
+ SetMI = &MI;
+ break;
+ }
+
+ const TargetInstrDesc &TID = MI.getDesc();
+ if (TID.hasUnmodeledSideEffects() ||
+ TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
+ break;
+ }
+
+ if (SetMI) {
+ unsigned OpCode = SetMI->getOpcode();
+
+ if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
+ BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
+ }
+ }
+ }
+
+ // Otherwise do a clumsy setcc and re-test it.
+ unsigned OpReg = getRegForValue(BI->getCondition());
+ if (OpReg == 0) return false;
+
+ BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
+ BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB);
+ MBB->addSuccessor(TrueMBB);
+ return true;
+}
+
+bool X86FastISel::X86SelectShift(Instruction *I) {
+ unsigned CReg = 0, OpReg = 0, OpImm = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (I->getType() == Type::Int8Ty) {
+ CReg = X86::CL;
+ RC = &X86::GR8RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int16Ty) {
+ CReg = X86::CX;
+ RC = &X86::GR16RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int32Ty) {
+ CReg = X86::ECX;
+ RC = &X86::GR32RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+ default: return false;
+ }
+ } else if (I->getType() == Type::Int64Ty) {
+ CReg = X86::RCX;
+ RC = &X86::GR64RegClass;
+ switch (I->getOpcode()) {
+ case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+ default: return false;
+ }
+ } else {
+ return false;
+ }
+
+ MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+
+ // Fold immediate in shl(x,3).
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(OpImm),
+ ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+
+ // The shift instruction uses X86::CL. If we defined a super-register
+ // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
+ // we're doing here.
+ if (CReg != X86::CL)
+ BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL)
+ .addReg(CReg).addImm(X86::SUBREG_8BIT);
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectSelect(Instruction *I) {
+ MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ if (VT.getSimpleVT() == MVT::i16) {
+ Opc = X86::CMOVE16rr;
+ RC = &X86::GR16RegClass;
+ } else if (VT.getSimpleVT() == MVT::i32) {
+ Opc = X86::CMOVE32rr;
+ RC = &X86::GR32RegClass;
+ } else if (VT.getSimpleVT() == MVT::i64) {
+ Opc = X86::CMOVE64rr;
+ RC = &X86::GR64RegClass;
+ } else {
+ return false;
+ }
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectFPExt(Instruction *I) {
+ // fpext from float to double.
+ if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) {
+ Value *V = I->getOperand(0);
+ if (V->getType() == Type::FloatTy) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
+ if (Subtarget->hasSSE2()) {
+ if (I->getType() == Type::FloatTy) {
+ Value *V = I->getOperand(0);
+ if (V->getType() == Type::DoubleTy) {
+ unsigned OpReg = getRegForValue(V);
+ if (OpReg == 0) return false;
+ unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86SelectTrunc(Instruction *I) {
+ if (Subtarget->is64Bit())
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+
+ // This code only handles truncation to byte right now.
+ if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+ if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ // All other cases should be handled by the tblgen generated code.
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+ ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
+
+ // Then issue an extract_subreg.
+ unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+ CopyReg, X86::SUBREG_8BIT);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86SelectExtractValue(Instruction *I) {
+ ExtractValueInst *EI = cast<ExtractValueInst>(I);
+ Value *Agg = EI->getAggregateOperand();
+
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
+ switch (CI->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ // Cheat a little. We know that the registers for "add" and "seto" are
+ // allocated sequentially. However, we only keep track of the register
+ // for "add" in the value map. Use extractvalue's index to get the
+ // correct register for "seto".
+ UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
+ // FIXME: Handle more intrinsics.
+ switch (I.getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
+ // Replace "add with overflow" intrinsics with an "add" instruction followed
+ // by a seto/setc instruction. Later on, when the "extractvalue"
+ // instructions are encountered, we use the fact that two registers were
+ // created sequentially to get the correct registers for the "sum" and the
+ // "overflow bit".
+ const Function *Callee = I.getCalledFunction();
+ const Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ Value *Op1 = I.getOperand(1);
+ Value *Op2 = I.getOperand(2);
+ unsigned Reg1 = getRegForValue(Op1);
+ unsigned Reg2 = getRegForValue(Op2);
+
+ if (Reg1 == 0 || Reg2 == 0)
+ // FIXME: Handle values *not* in registers.
+ return false;
+
+ unsigned OpC = 0;
+ if (VT == MVT::i32)
+ OpC = X86::ADD32rr;
+ else if (VT == MVT::i64)
+ OpC = X86::ADD64rr;
+ else
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
+ unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
+
+ // If the add with overflow is an intra-block value then we just want to
+ // create temporaries for it like normal. If it is a cross-block value then
+ // UpdateValueMap will return the cross-block register used. Since we
+ // *really* want the value to be live in the register pair known by
+ // UpdateValueMap, we have to use DestReg1+1 as the destination register in
+ // the cross block case. In the non-cross-block case, we should just make
+ // another register for the value.
+ if (DestReg1 != ResultReg)
+ ResultReg = DestReg1+1;
+ else
+ ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
+
+ unsigned Opc = X86::SETBr;
+ if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+ Opc = X86::SETOr;
+ BuildMI(MBB, DL, TII.get(Opc), ResultReg);
+ return true;
+ }
+ }
+}
+
+bool X86FastISel::X86SelectCall(Instruction *I) {
+ CallInst *CI = cast<CallInst>(I);
+ Value *Callee = I->getOperand(0);
+
+ // Can't handle inline asm yet.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Handle intrinsic calls.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+ return X86VisitIntrinsicCall(*II);
+
+ // Handle only C and fastcc calling conventions for now.
+ CallSite CS(CI);
+ unsigned CC = CS.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't
+ // handle this for now.
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = CS.getType();
+ MVT RetVT;
+ if (RetTy == Type::VoidTy)
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT, true))
+ return false;
+
+ // Materialize callee address in a register. FIXME: GV address can be
+ // handled with a CALLpcrel32 instead.
+ X86AddressMode CalleeAM;
+ if (!X86SelectAddress(Callee, CalleeAM, true))
+ return false;
+ unsigned CalleeOp = 0;
+ GlobalValue *GV = 0;
+ if (CalleeAM.Base.Reg != 0) {
+ assert(CalleeAM.GV == 0);
+ CalleeOp = CalleeAM.Base.Reg;
+ } else if (CalleeAM.GV != 0) {
+ assert(CalleeAM.GV != 0);
+ GV = CalleeAM.GV;
+ } else
+ return false;
+
+ // Allow calls which produce i1 results.
+ bool AndToI1 = false;
+ if (RetVT == MVT::i1) {
+ RetVT = MVT::i8;
+ AndToI1 = true;
+ }
+
+ // Deal with call operands first.
+ SmallVector<Value*, 8> ArgVals;
+ SmallVector<unsigned, 8> Args;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgVals.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Arg);
+ ArgVals.push_back(*i);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, TM, ArgLocs);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
+
+ // Process argument: walk the register/memloc assignments, inserting
+ // copies / loads.
+ SmallVector<unsigned, 4> RegArgs;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = Args[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ }
+
+ if (VA.isRegLoc()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
+ Arg, RC, RC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ RegArgs.push_back(VA.getLocReg());
+ } else {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ X86AddressMode AM;
+ AM.Base.Reg = StackPtr;
+ AM.Disp = LocMemOffset;
+ Value *ArgVal = ArgVals[VA.getValNo()];
+
+ // If this is a really simple value, emit this with the Value* version of
+ // X86FastEmitStore. If it isn't simple, we don't want to do this, as it
+ // can cause us to reevaluate the argument.
+ if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
+ X86FastEmitStore(ArgVT, ArgVal, AM);
+ else
+ X86FastEmitStore(ArgVT, Arg, AM);
+ }
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (!Subtarget->is64Bit() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ TargetRegisterClass *RC = X86::GR32RegisterClass;
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ }
+
+ // Issue the call.
+ unsigned CallOpc = CalleeOp
+ ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r)
+ : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+ MachineInstrBuilder MIB = CalleeOp
+ ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp)
+ : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV);
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!Subtarget->is64Bit() &&
+ TM.getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ MIB.addReg(X86::EBX);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
+
+ // Now handle call return value (if any).
+ if (RetVT.getSimpleVT() != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, TM, RVLocs);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
+
+ // Copy all of the result registers out of their specified physreg.
+ assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
+ MVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+ TargetRegisterClass *SrcRC = DstRC;
+
+ // If this is a call to a function that returns an fp value on the x87 fp
+ // stack, but where we prefer to use the value in xmm registers, copy it
+ // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((RVLocs[0].getLocReg() == X86::ST0 ||
+ RVLocs[0].getLocReg() == X86::ST1) &&
+ isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
+ CopyVT = MVT::f80;
+ SrcRC = X86::RSTRegisterClass;
+ DstRC = X86::RFP80RegisterClass;
+ }
+
+ unsigned ResultReg = createResultReg(DstRC);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
+ RVLocs[0].getLocReg(), DstRC, SrcRC);
+ assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ Emitted = true;
+ if (CopyVT != RVLocs[0].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register. This is accomplished by storing the F80 value in memory and
+ // then loading it back. Ewww...
+ MVT ResVT = RVLocs[0].getValVT();
+ unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+ unsigned MemSize = ResVT.getSizeInBits()/8;
+ int FI = MFI.CreateStackObject(MemSize, MemSize);
+ addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
+ DstRC = ResVT == MVT::f32
+ ? X86::FR32RegisterClass : X86::FR64RegisterClass;
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ ResultReg = createResultReg(DstRC);
+ addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
+ }
+
+ if (AndToI1) {
+ // Mask out all but lowest bit for some call which produces an i1.
+ unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ BuildMI(MBB, DL,
+ TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
+ ResultReg = AndResult;
+ }
+
+ UpdateValueMap(I, ResultReg);
+ }
+
+ return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(Instruction *I) {
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::Load:
+ return X86SelectLoad(I);
+ case Instruction::Store:
+ return X86SelectStore(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return X86SelectCmp(I);
+ case Instruction::ZExt:
+ return X86SelectZExt(I);
+ case Instruction::Br:
+ return X86SelectBranch(I);
+ case Instruction::Call:
+ return X86SelectCall(I);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ return X86SelectShift(I);
+ case Instruction::Select:
+ return X86SelectSelect(I);
+ case Instruction::Trunc:
+ return X86SelectTrunc(I);
+ case Instruction::FPExt:
+ return X86SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return X86SelectFPTrunc(I);
+ case Instruction::ExtractValue:
+ return X86SelectExtractValue(I);
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+ MVT DstVT = TLI.getValueType(I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return X86SelectZExt(I);
+ if (DstVT.bitsLT(SrcVT))
+ return X86SelectTrunc(I);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0) return false;
+ UpdateValueMap(I, Reg);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
+ MVT VT;
+ if (!isTypeLegal(C->getType(), VT))
+ return false;
+
+ // Get opcode and regclass of the output for the given load instruction.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.getSimpleVT()) {
+ default: return false;
+ case MVT::i8:
+ Opc = X86::MOV8rm;
+ RC = X86::GR8RegisterClass;
+ break;
+ case MVT::i16:
+ Opc = X86::MOV16rm;
+ RC = X86::GR16RegisterClass;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+ break;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::MOVSSrm;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp32m;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::MOVSDrm;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp64m;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ // Materialize addresses with LEA instructions.
+ if (isa<GlobalValue>(C)) {
+ X86AddressMode AM;
+ if (X86SelectAddress(C, AM, false)) {
+ if (TLI.getPointerTy() == MVT::i32)
+ Opc = X86::LEA32r;
+ else
+ Opc = X86::LEA64r;
+ unsigned ResultReg = createResultReg(RC);
+ addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+ }
+ return 0;
+ }
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->is64Bit())
+ PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+
+ // Create the load from the constant pool.
+ unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+ unsigned ResultReg = createResultReg(RC);
+ addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset,
+ PICBase);
+
+ return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
+ // Fail on dynamic allocas. At this point, getRegForValue has already
+ // checked its CSE maps, so if we're here trying to handle a dynamic
+ // alloca, we're not going to succeed. X86SelectAddress has a
+ // check for dynamic allocas, because it's called directly from
+ // various places, but TargetMaterializeAlloca also needs a check
+ // in order to avoid recursion between getRegForValue,
+ // X86SelectAddrss, and TargetMaterializeAlloca.
+ if (!StaticAllocaMap.count(C))
+ return 0;
+
+ X86AddressMode AM;
+ if (!X86SelectAddress(C, AM, false))
+ return 0;
+ unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ return ResultReg;
+}
+
+namespace llvm {
+ llvm::FastISel *X86::createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ ) {
+ return new X86FastISel(mf, mmi, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ );
+ }
+}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 000000000000..0f2fbcc98694
--- /dev/null
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1187 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// virtual registers into register stack instructions. This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// This pass is hampered by the lack of decent CFG manipulation routines for
+// machine code. In particular, this wants to be able to split critical edges
+// as necessary, traverse the machine basic block CFG in depth-first order, and
+// allow there to be multiple machine basic blocks for each LLVM basicblock
+// (needed for critical edge splitting).
+//
+// In particular, this pass currently barfs on critical edges. Because of this,
+// it requires the instruction selector to insert FP_REG_KILL instructions on
+// the exits of any basic block that has critical edges going from it, or which
+// branch to a critical basic block.
+//
+// FIXME: this is not implemented yet. The stackifier pass only works on local
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP , "Number of floating point instructions");
+
+namespace {
+ struct VISIBILITY_HIDDEN FPS : public MachineFunctionPass {
+ static char ID;
+ FPS() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+ private:
+ const TargetInstrInfo *TII; // Machine instruction info.
+ MachineBasicBlock *MBB; // Current basic block
+ unsigned Stack[8]; // FP<n> Registers in each stack slot...
+ unsigned RegMap[8]; // Track which stack slot contains each register
+ unsigned StackTop; // The current top of the FP stack.
+
+ void dumpStack() const {
+ cerr << "Stack contents:";
+ for (unsigned i = 0; i != StackTop; ++i) {
+ cerr << " FP" << Stack[i];
+ assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+ }
+ cerr << "\n";
+ }
+ private:
+ /// isStackEmpty - Return true if the FP stack is empty.
+ bool isStackEmpty() const {
+ return StackTop == 0;
+ }
+
+ // getSlot - Return the stack slot number a particular register number is
+ // in.
+ unsigned getSlot(unsigned RegNo) const {
+ assert(RegNo < 8 && "Regno out of range!");
+ return RegMap[RegNo];
+ }
+
+ // getStackEntry - Return the X86::FP<n> register in register ST(i).
+ unsigned getStackEntry(unsigned STi) const {
+ assert(STi < StackTop && "Access past stack top!");
+ return Stack[StackTop-1-STi];
+ }
+
+ // getSTReg - Return the X86::ST(i) register which contains the specified
+ // FP<RegNo> register.
+ unsigned getSTReg(unsigned RegNo) const {
+ return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+ }
+
+ // pushReg - Push the specified FP<n> register onto the stack.
+ void pushReg(unsigned Reg) {
+ assert(Reg < 8 && "Register number out of range!");
+ assert(StackTop < 8 && "Stack overflow!");
+ Stack[StackTop] = Reg;
+ RegMap[Reg] = StackTop++;
+ }
+
+ bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+ void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ if (isAtTop(RegNo)) return;
+
+ unsigned STReg = getSTReg(RegNo);
+ unsigned RegOnTop = getStackEntry(0);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ assert(RegMap[RegOnTop] < StackTop);
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+ // Emit an fxch to update the runtime processors version of the state.
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+ NumFXCH++;
+ }
+
+ void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+ DebugLoc dl = I->getDebugLoc();
+ unsigned STReg = getSTReg(RegNo);
+ pushReg(AsReg); // New register on top of stack
+
+ BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+ }
+
+ // popStackAfter - Pop the current value off of the top of the FP stack
+ // after the specified instruction.
+ void popStackAfter(MachineBasicBlock::iterator &I);
+
+ // freeStackSlotAfter - Free the specified register from the register stack,
+ // so that it is no longer in a register. If the register is currently at
+ // the top of the stack, we just pop the current instruction, otherwise we
+ // store the current top-of-stack into the specified slot, then pop the top
+ // of stack.
+ void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ void handleZeroArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFP(MachineBasicBlock::iterator &I);
+ void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+ void handleTwoArgFP(MachineBasicBlock::iterator &I);
+ void handleCompareFP(MachineBasicBlock::iterator &I);
+ void handleCondMovFP(MachineBasicBlock::iterator &I);
+ void handleSpecialFP(MachineBasicBlock::iterator &I);
+ };
+ char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+ assert(MO.isReg() && "Expected an FP register!");
+ unsigned Reg = MO.getReg();
+ assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+ return Reg - X86::FP0;
+}
+
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+ // We only need to run this pass if there are any FP registers used in this
+ // function. If it is all integer, there is nothing for us to do!
+ bool FPIsUsed = false;
+
+ assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ for (unsigned i = 0; i <= 6; ++i)
+ if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ FPIsUsed = true;
+ break;
+ }
+
+ // Early exit.
+ if (!FPIsUsed) return false;
+
+ TII = MF.getTarget().getInstrInfo();
+ StackTop = 0;
+
+ // Process the function in depth first order so that we process at least one
+ // of the predecessors for every reachable block in the function.
+ SmallPtrSet<MachineBasicBlock*, 8> Processed;
+ MachineBasicBlock *Entry = MF.begin();
+
+ bool Changed = false;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+ I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+ I != E; ++I)
+ Changed |= processBasicBlock(MF, **I);
+
+ return Changed;
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+ bool Changed = false;
+ MBB = &BB;
+
+ for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+ MachineInstr *MI = I;
+ unsigned Flags = MI->getDesc().TSFlags;
+
+ unsigned FPInstClass = Flags & X86II::FPTypeMask;
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM)
+ FPInstClass = X86II::SpecialFP;
+
+ if (FPInstClass == X86II::NotFP)
+ continue; // Efficiently ignore non-fp insts!
+
+ MachineInstr *PrevMI = 0;
+ if (I != BB.begin())
+ PrevMI = prior(I);
+
+ ++NumFP; // Keep track of # of pseudo instrs
+ DOUT << "\nFPInst:\t" << *MI;
+
+ // Get dead variables list now because the MI pointer may be deleted as part
+ // of processing!
+ SmallVector<unsigned, 8> DeadRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDead())
+ DeadRegs.push_back(MO.getReg());
+ }
+
+ switch (FPInstClass) {
+ case X86II::ZeroArgFP: handleZeroArgFP(I); break;
+ case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0)
+ case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+ case X86II::TwoArgFP: handleTwoArgFP(I); break;
+ case X86II::CompareFP: handleCompareFP(I); break;
+ case X86II::CondMovFP: handleCondMovFP(I); break;
+ case X86II::SpecialFP: handleSpecialFP(I); break;
+ default: assert(0 && "Unknown FP Type!");
+ }
+
+ // Check to see if any of the values defined by this instruction are dead
+ // after definition. If so, pop them.
+ for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+ unsigned Reg = DeadRegs[i];
+ if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+ DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n";
+ freeStackSlotAfter(I, Reg-X86::FP0);
+ }
+ }
+
+ // Print out all of the instructions expanded to if -debug
+ DEBUG(
+ MachineBasicBlock::iterator PrevI(PrevMI);
+ if (I == PrevI) {
+ cerr << "Just deleted pseudo instruction\n";
+ } else {
+ MachineBasicBlock::iterator Start = I;
+ // Rewind to first instruction newly inserted.
+ while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+ cerr << "Inserted instructions:\n\t";
+ Start->print(*cerr.stream(), &MF.getTarget());
+ while (++Start != next(I)) {}
+ }
+ dumpStack();
+ );
+
+ Changed = true;
+ }
+
+ assert(isStackEmpty() && "Stack not empty at end of basic block?");
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+ struct TableEntry {
+ unsigned from;
+ unsigned to;
+ bool operator<(const TableEntry &TE) const { return from < TE.from; }
+ friend bool operator<(const TableEntry &TE, unsigned V) {
+ return TE.from < V;
+ }
+ friend bool operator<(unsigned V, const TableEntry &TE) {
+ return V < TE.from;
+ }
+ };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ if (!(Table[i] < Table[i+1])) return false;
+ return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+ if (I != Table+N && I->from == Opcode)
+ return I->to;
+ return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE) \
+ { static bool TABLE##Checked = false; \
+ if (!TABLE##Checked) { \
+ assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ "All lookup tables must be sorted for efficient access!"); \
+ TABLE##Checked = true; \
+ } \
+ }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+ { X86::ABS_Fp32 , X86::ABS_F },
+ { X86::ABS_Fp64 , X86::ABS_F },
+ { X86::ABS_Fp80 , X86::ABS_F },
+ { X86::ADD_Fp32m , X86::ADD_F32m },
+ { X86::ADD_Fp64m , X86::ADD_F64m },
+ { X86::ADD_Fp64m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m32 , X86::ADD_F32m },
+ { X86::ADD_Fp80m64 , X86::ADD_F64m },
+ { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+ { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+ { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+ { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+ { X86::CHS_Fp32 , X86::CHS_F },
+ { X86::CHS_Fp64 , X86::CHS_F },
+ { X86::CHS_Fp80 , X86::CHS_F },
+ { X86::CMOVBE_Fp32 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp64 , X86::CMOVBE_F },
+ { X86::CMOVBE_Fp80 , X86::CMOVBE_F },
+ { X86::CMOVB_Fp32 , X86::CMOVB_F },
+ { X86::CMOVB_Fp64 , X86::CMOVB_F },
+ { X86::CMOVB_Fp80 , X86::CMOVB_F },
+ { X86::CMOVE_Fp32 , X86::CMOVE_F },
+ { X86::CMOVE_Fp64 , X86::CMOVE_F },
+ { X86::CMOVE_Fp80 , X86::CMOVE_F },
+ { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+ { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+ { X86::CMOVNB_Fp32 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp64 , X86::CMOVNB_F },
+ { X86::CMOVNB_Fp80 , X86::CMOVNB_F },
+ { X86::CMOVNE_Fp32 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp64 , X86::CMOVNE_F },
+ { X86::CMOVNE_Fp80 , X86::CMOVNE_F },
+ { X86::CMOVNP_Fp32 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp64 , X86::CMOVNP_F },
+ { X86::CMOVNP_Fp80 , X86::CMOVNP_F },
+ { X86::CMOVP_Fp32 , X86::CMOVP_F },
+ { X86::CMOVP_Fp64 , X86::CMOVP_F },
+ { X86::CMOVP_Fp80 , X86::CMOVP_F },
+ { X86::COS_Fp32 , X86::COS_F },
+ { X86::COS_Fp64 , X86::COS_F },
+ { X86::COS_Fp80 , X86::COS_F },
+ { X86::DIVR_Fp32m , X86::DIVR_F32m },
+ { X86::DIVR_Fp64m , X86::DIVR_F64m },
+ { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+ { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+ { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+ { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+ { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+ { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+ { X86::DIV_Fp32m , X86::DIV_F32m },
+ { X86::DIV_Fp64m , X86::DIV_F64m },
+ { X86::DIV_Fp64m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m32 , X86::DIV_F32m },
+ { X86::DIV_Fp80m64 , X86::DIV_F64m },
+ { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+ { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+ { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+ { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+ { X86::ILD_Fp16m32 , X86::ILD_F16m },
+ { X86::ILD_Fp16m64 , X86::ILD_F16m },
+ { X86::ILD_Fp16m80 , X86::ILD_F16m },
+ { X86::ILD_Fp32m32 , X86::ILD_F32m },
+ { X86::ILD_Fp32m64 , X86::ILD_F32m },
+ { X86::ILD_Fp32m80 , X86::ILD_F32m },
+ { X86::ILD_Fp64m32 , X86::ILD_F64m },
+ { X86::ILD_Fp64m64 , X86::ILD_F64m },
+ { X86::ILD_Fp64m80 , X86::ILD_F64m },
+ { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+ { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+ { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+ { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+ { X86::IST_Fp16m32 , X86::IST_F16m },
+ { X86::IST_Fp16m64 , X86::IST_F16m },
+ { X86::IST_Fp16m80 , X86::IST_F16m },
+ { X86::IST_Fp32m32 , X86::IST_F32m },
+ { X86::IST_Fp32m64 , X86::IST_F32m },
+ { X86::IST_Fp32m80 , X86::IST_F32m },
+ { X86::IST_Fp64m32 , X86::IST_FP64m },
+ { X86::IST_Fp64m64 , X86::IST_FP64m },
+ { X86::IST_Fp64m80 , X86::IST_FP64m },
+ { X86::LD_Fp032 , X86::LD_F0 },
+ { X86::LD_Fp064 , X86::LD_F0 },
+ { X86::LD_Fp080 , X86::LD_F0 },
+ { X86::LD_Fp132 , X86::LD_F1 },
+ { X86::LD_Fp164 , X86::LD_F1 },
+ { X86::LD_Fp180 , X86::LD_F1 },
+ { X86::LD_Fp32m , X86::LD_F32m },
+ { X86::LD_Fp32m64 , X86::LD_F32m },
+ { X86::LD_Fp32m80 , X86::LD_F32m },
+ { X86::LD_Fp64m , X86::LD_F64m },
+ { X86::LD_Fp64m80 , X86::LD_F64m },
+ { X86::LD_Fp80m , X86::LD_F80m },
+ { X86::MUL_Fp32m , X86::MUL_F32m },
+ { X86::MUL_Fp64m , X86::MUL_F64m },
+ { X86::MUL_Fp64m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m32 , X86::MUL_F32m },
+ { X86::MUL_Fp80m64 , X86::MUL_F64m },
+ { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+ { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+ { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+ { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+ { X86::SIN_Fp32 , X86::SIN_F },
+ { X86::SIN_Fp64 , X86::SIN_F },
+ { X86::SIN_Fp80 , X86::SIN_F },
+ { X86::SQRT_Fp32 , X86::SQRT_F },
+ { X86::SQRT_Fp64 , X86::SQRT_F },
+ { X86::SQRT_Fp80 , X86::SQRT_F },
+ { X86::ST_Fp32m , X86::ST_F32m },
+ { X86::ST_Fp64m , X86::ST_F64m },
+ { X86::ST_Fp64m32 , X86::ST_F32m },
+ { X86::ST_Fp80m32 , X86::ST_F32m },
+ { X86::ST_Fp80m64 , X86::ST_F64m },
+ { X86::ST_FpP80m , X86::ST_FP80m },
+ { X86::SUBR_Fp32m , X86::SUBR_F32m },
+ { X86::SUBR_Fp64m , X86::SUBR_F64m },
+ { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+ { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+ { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+ { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+ { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+ { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+ { X86::SUB_Fp32m , X86::SUB_F32m },
+ { X86::SUB_Fp64m , X86::SUB_F64m },
+ { X86::SUB_Fp64m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m32 , X86::SUB_F32m },
+ { X86::SUB_Fp80m64 , X86::SUB_F64m },
+ { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+ { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+ { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+ { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+ { X86::TST_Fp32 , X86::TST_F },
+ { X86::TST_Fp64 , X86::TST_F },
+ { X86::TST_Fp80 , X86::TST_F },
+ { X86::UCOM_FpIr32 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr64 , X86::UCOM_FIr },
+ { X86::UCOM_FpIr80 , X86::UCOM_FIr },
+ { X86::UCOM_Fpr32 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr64 , X86::UCOM_Fr },
+ { X86::UCOM_Fpr80 , X86::UCOM_Fr },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+ ASSERT_SORTED(OpcodeTable);
+ int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+ return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version. The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+ { X86::ADD_FrST0 , X86::ADD_FPrST0 },
+
+ { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+ { X86::DIV_FrST0 , X86::DIV_FPrST0 },
+
+ { X86::IST_F16m , X86::IST_FP16m },
+ { X86::IST_F32m , X86::IST_FP32m },
+
+ { X86::MUL_FrST0 , X86::MUL_FPrST0 },
+
+ { X86::ST_F32m , X86::ST_FP32m },
+ { X86::ST_F64m , X86::ST_FP64m },
+ { X86::ST_Frr , X86::ST_FPrr },
+
+ { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+ { X86::SUB_FrST0 , X86::SUB_FPrST0 },
+
+ { X86::UCOM_FIr , X86::UCOM_FIPr },
+
+ { X86::UCOM_FPr , X86::UCOM_FPPr },
+ { X86::UCOM_Fr , X86::UCOM_FPr },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction. This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible. The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+ MachineInstr* MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ ASSERT_SORTED(PopTable);
+ assert(StackTop > 0 && "Cannot pop empty stack!");
+ RegMap[Stack[--StackTop]] = ~0; // Update state
+
+ // Check to see if there is a popping version of this instruction...
+ int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ if (Opcode != -1) {
+ I->setDesc(TII->get(Opcode));
+ if (Opcode == X86::UCOM_FPPr)
+ I->RemoveOperand(0);
+ } else { // Insert an explicit pop
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+ }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register. If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+ if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy.
+ popStackAfter(I);
+ return;
+ }
+
+ // Otherwise, store the top of stack into the dead slot, killing the operand
+ // without having to add in an explicit xchg then pop.
+ //
+ unsigned STReg = getSTReg(FPRegNo);
+ unsigned OldSlot = getSlot(FPRegNo);
+ unsigned TopReg = Stack[StackTop-1];
+ Stack[OldSlot] = TopReg;
+ RegMap[TopReg] = OldSlot;
+ RegMap[FPRegNo] = ~0;
+ Stack[--StackTop] = ~0;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned DestReg = getFPReg(MI->getOperand(0));
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0); // Remove the explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // Result gets pushed on the stack.
+ pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) &&
+ "Can only handle fst* & ftst instructions!");
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ // FISTP64m is strange because there isn't a non-popping versions.
+ // If we have one _and_ we don't want to pop the operand, duplicate the value
+ // on the stack instead of moving it. This ensure that popping the value is
+ // always ok.
+ // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+ //
+ if (!KillsSrc &&
+ (MI->getOpcode() == X86::IST_Fp64m32 ||
+ MI->getOpcode() == X86::ISTT_Fp16m32 ||
+ MI->getOpcode() == X86::ISTT_Fp32m32 ||
+ MI->getOpcode() == X86::ISTT_Fp64m32 ||
+ MI->getOpcode() == X86::IST_Fp64m64 ||
+ MI->getOpcode() == X86::ISTT_Fp16m64 ||
+ MI->getOpcode() == X86::ISTT_Fp32m64 ||
+ MI->getOpcode() == X86::ISTT_Fp64m64 ||
+ MI->getOpcode() == X86::IST_Fp64m80 ||
+ MI->getOpcode() == X86::ISTT_Fp16m80 ||
+ MI->getOpcode() == X86::ISTT_Fp32m80 ||
+ MI->getOpcode() == X86::ISTT_Fp64m80 ||
+ MI->getOpcode() == X86::ST_FpP80m)) {
+ duplicateToTop(Reg, 7 /*temp register*/, I);
+ } else {
+ moveToTop(Reg, I); // Move to the top of the stack...
+ }
+
+ // Convert from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ if (MI->getOpcode() == X86::IST_FP64m ||
+ MI->getOpcode() == X86::ISTT_FP16m ||
+ MI->getOpcode() == X86::ISTT_FP32m ||
+ MI->getOpcode() == X86::ISTT_FP64m ||
+ MI->getOpcode() == X86::ST_FP80m) {
+ assert(StackTop > 0 && "Stack empty??");
+ --StackTop;
+ } else if (KillsSrc) { // Last use of operand?
+ popStackAfter(I);
+ }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value. These instructions may have
+/// non-fp operands after their FP operands.
+///
+/// Examples:
+/// R1 = fchs R2
+/// R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+#ifndef NDEBUG
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+ // Is this the last use of the source register?
+ unsigned Reg = getFPReg(MI->getOperand(1));
+ bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+ if (KillsSrc) {
+ // If this is the last use of the source register, just make sure it's on
+ // the top of the stack.
+ moveToTop(Reg, I);
+ assert(StackTop > 0 && "Stack cannot be empty!");
+ --StackTop;
+ pushReg(getFPReg(MI->getOperand(0)));
+ } else {
+ // If this is not the last use of the source register, _copy_ it to the top
+ // of the stack.
+ duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+ }
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(1); // Drop the source operand.
+ MI->RemoveOperand(0); // Drop the destination operand.
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r },
+ { X86::ADD_Fp64 , X86::ADD_FST0r },
+ { X86::ADD_Fp80 , X86::ADD_FST0r },
+ { X86::DIV_Fp32 , X86::DIV_FST0r },
+ { X86::DIV_Fp64 , X86::DIV_FST0r },
+ { X86::DIV_Fp80 , X86::DIV_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r },
+ { X86::MUL_Fp64 , X86::MUL_FST0r },
+ { X86::MUL_Fp80 , X86::MUL_FST0r },
+ { X86::SUB_Fp32 , X86::SUB_FST0r },
+ { X86::SUB_Fp64 , X86::SUB_FST0r },
+ { X86::SUB_Fp80 , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+ { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FST0r },
+ { X86::DIV_Fp64 , X86::DIVR_FST0r },
+ { X86::DIV_Fp80 , X86::DIVR_FST0r },
+ { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FST0r },
+ { X86::SUB_Fp64 , X86::SUBR_FST0r },
+ { X86::SUB_Fp80 , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative
+ { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative
+ { X86::DIV_Fp32 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp64 , X86::DIVR_FrST0 },
+ { X86::DIV_Fp80 , X86::DIVR_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative
+ { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative
+ { X86::SUB_Fp32 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp64 , X86::SUBR_FrST0 },
+ { X86::SUB_Fp80 , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+ { X86::ADD_Fp32 , X86::ADD_FrST0 },
+ { X86::ADD_Fp64 , X86::ADD_FrST0 },
+ { X86::ADD_Fp80 , X86::ADD_FrST0 },
+ { X86::DIV_Fp32 , X86::DIV_FrST0 },
+ { X86::DIV_Fp64 , X86::DIV_FrST0 },
+ { X86::DIV_Fp80 , X86::DIV_FrST0 },
+ { X86::MUL_Fp32 , X86::MUL_FrST0 },
+ { X86::MUL_Fp64 , X86::MUL_FrST0 },
+ { X86::MUL_Fp80 , X86::MUL_FrST0 },
+ { X86::SUB_Fp32 , X86::SUB_FrST0 },
+ { X86::SUB_Fp64 , X86::SUB_FrST0 },
+ { X86::SUB_Fp80 , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub ST(0), ST(i)
+/// ST(i) = fsub ST(0), ST(i)
+/// ST(0) = fsubr ST(0), ST(i)
+/// ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+ unsigned Dest = getFPReg(MI->getOperand(0));
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned TOS = getStackEntry(0);
+
+ // One of our operands must be on the top of the stack. If neither is yet, we
+ // need to move one.
+ if (Op0 != TOS && Op1 != TOS) { // No operand at TOS?
+ // We can choose to move either operand to the top of the stack. If one of
+ // the operands is killed by this instruction, we want that one so that we
+ // can update right on top of the old version.
+ if (KillsOp0) {
+ moveToTop(Op0, I); // Move dead operand to TOS.
+ TOS = Op0;
+ } else if (KillsOp1) {
+ moveToTop(Op1, I);
+ TOS = Op1;
+ } else {
+ // All of the operands are live after this instruction executes, so we
+ // cannot update on top of any operand. Because of this, we must
+ // duplicate one of the stack elements to the top. It doesn't matter
+ // which one we pick.
+ //
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+ } else if (!KillsOp0 && !KillsOp1) {
+ // If we DO have one of our operands at the top of the stack, but we don't
+ // have a dead operand, we must duplicate one of the operands to a new slot
+ // on the stack.
+ duplicateToTop(Op0, Dest, I);
+ Op0 = TOS = Dest;
+ KillsOp0 = true;
+ }
+
+ // Now we know that one of our operands is on the top of the stack, and at
+ // least one of our operands is killed by this instruction.
+ assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+ "Stack conditions not set up right!");
+
+ // We decide which form to use based on what is on the top of the stack, and
+ // which operand is killed by this instruction.
+ const TableEntry *InstTable;
+ bool isForward = TOS == Op0;
+ bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+ if (updateST0) {
+ if (isForward)
+ InstTable = ForwardST0Table;
+ else
+ InstTable = ReverseST0Table;
+ } else {
+ if (isForward)
+ InstTable = ForwardSTiTable;
+ else
+ InstTable = ReverseSTiTable;
+ }
+
+ int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+ MI->getOpcode());
+ assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+ // NotTOS - The register which is not on the top of stack...
+ unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+ // Replace the old instruction with a new instruction
+ MBB->remove(I++);
+ I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+ // If both operands are killed, pop one off of the stack in addition to
+ // overwriting the other one.
+ if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+ assert(!updateST0 && "Should have updated other operand!");
+ popStackAfter(I); // Pop the top of stack
+ }
+
+ // Update stack information so that we know the destination register is now on
+ // the stack.
+ unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+ assert(UpdatedSlot < StackTop && Dest < 7);
+ Stack[UpdatedSlot] = Dest;
+ RegMap[Dest] = UpdatedSlot;
+ MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+ ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+ ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+ MachineInstr *MI = I;
+
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+ unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+ unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+ bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // Make sure the first operand is on the top of stack, the other one can be
+ // anywhere.
+ moveToTop(Op0, I);
+
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->RemoveOperand(1);
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If any of the operands are killed by this instruction, free them.
+ if (KillsOp0) freeStackSlotAfter(I, Op0);
+ if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions. These
+/// instructions move a st(i) register to st(0) iff a condition is true. These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ unsigned Op1 = getFPReg(MI->getOperand(2));
+ bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+ // The first operand *must* be on the top of the stack.
+ moveToTop(Op0, I);
+
+ // Change the second operand to the stack register that the operand is in.
+ // Change from the pseudo instruction to the concrete instruction.
+ MI->RemoveOperand(0);
+ MI->RemoveOperand(1);
+ MI->getOperand(0).setReg(getSTReg(Op1));
+ MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+ // If we kill the second operand, make sure to pop it from the stack.
+ if (Op0 != Op1 && KillsOp1) {
+ // Get this value off of the register stack.
+ freeStackSlotAfter(I, Op1);
+ }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions. This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unknown SpecialFP instruction!");
+ case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
+ assert(StackTop == 0 && "Stack should be empty after a call!");
+ pushReg(getFPReg(MI->getOperand(0)));
+ break;
+ case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
+ case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
+ // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
+ // The pattern we expect is:
+ // CALL
+ // FP1 = FpGET_ST0
+ // FP4 = FpGET_ST1
+ //
+ // At this point, we've pushed FP1 on the top of stack, so it should be
+ // present if it isn't dead. If it was dead, we already emitted a pop to
+ // remove it from the stack and StackTop = 0.
+
+ // Push FP4 as top of stack next.
+ pushReg(getFPReg(MI->getOperand(0)));
+
+ // If StackTop was 0 before we pushed our operand, then ST(0) must have been
+ // dead. In this case, the ST(1) value is the only thing that is live, so
+ // it should be on the TOS (after the pop that was emitted) and is. Just
+ // continue in this case.
+ if (StackTop == 1)
+ break;
+
+ // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
+ // elements so that our accounting is correct.
+ unsigned RegOnTop = getStackEntry(0);
+ unsigned RegNo = getStackEntry(1);
+
+ // Swap the slots the regs are in.
+ std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+ // Swap stack slot contents.
+ assert(RegMap[RegOnTop] < StackTop);
+ std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+ break;
+ }
+ case X86::FpSET_ST0_32:
+ case X86::FpSET_ST0_64:
+ case X86::FpSET_ST0_80:
+ assert((StackTop == 1 || StackTop == 2)
+ && "Stack should have one or two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ case X86::FpSET_ST1_32:
+ case X86::FpSET_ST1_64:
+ case X86::FpSET_ST1_80:
+ // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
+ if (StackTop == 1) {
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
+ NumFXCH++;
+ StackTop = 0;
+ break;
+ }
+ assert(StackTop == 2 && "Stack should have two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ case X86::MOV_Fp3232:
+ case X86::MOV_Fp3264:
+ case X86::MOV_Fp6432:
+ case X86::MOV_Fp6464:
+ case X86::MOV_Fp3280:
+ case X86::MOV_Fp6480:
+ case X86::MOV_Fp8032:
+ case X86::MOV_Fp8064:
+ case X86::MOV_Fp8080: {
+ const MachineOperand &MO1 = MI->getOperand(1);
+ unsigned SrcReg = getFPReg(MO1);
+
+ const MachineOperand &MO0 = MI->getOperand(0);
+ // These can be created due to inline asm. Two address pass can introduce
+ // copies from RFP registers to virtual registers.
+ if (MO0.getReg() == X86::ST0 && SrcReg == 0) {
+ assert(MO1.isKill());
+ // Treat %ST0<def> = MOV_Fp8080 %FP0<kill>
+ // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def>
+ assert((StackTop == 1 || StackTop == 2)
+ && "Stack should have one or two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) {
+ assert(MO1.isKill());
+ // Treat %ST1<def> = MOV_Fp8080 %FP1<kill>
+ // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def>
+ // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
+ if (StackTop == 1) {
+ BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
+ NumFXCH++;
+ StackTop = 0;
+ break;
+ }
+ assert(StackTop == 2 && "Stack should have two element on it to return!");
+ --StackTop; // "Forget" we have something on the top of stack!
+ break;
+ }
+
+ unsigned DestReg = getFPReg(MO0);
+ if (MI->killsRegister(X86::FP0+SrcReg)) {
+ // If the input operand is killed, we can just change the owner of the
+ // incoming stack slot into the result.
+ unsigned Slot = getSlot(SrcReg);
+ assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
+ Stack[Slot] = DestReg;
+ RegMap[DestReg] = Slot;
+
+ } else {
+ // For FMOV we just duplicate the specified value to a new stack slot.
+ // This could be made better, but would require substantial changes.
+ duplicateToTop(SrcReg, DestReg, I);
+ }
+ }
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ // The inline asm MachineInstr currently only *uses* FP registers for the
+ // 'f' constraint. These should be turned into the current ST(x) register
+ // in the machine instr. Also, any kills should be explicitly popped after
+ // the inline asm.
+ unsigned Kills[7];
+ unsigned NumKills = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ assert(Op.isUse() && "Only handle inline asm uses right now");
+
+ unsigned FPReg = getFPReg(Op);
+ Op.setReg(getSTReg(FPReg));
+
+ // If we kill this operand, make sure to pop it from the stack after the
+ // asm. We just remember it for now, and pop them all off at the end in
+ // a batch.
+ if (Op.isKill())
+ Kills[NumKills++] = FPReg;
+ }
+
+ // If this asm kills any FP registers (is the last use of them) we must
+ // explicitly emit pop instructions for them. Do this now after the asm has
+ // executed so that the ST(x) numbers are not off (which would happen if we
+ // did this inline with operand rewriting).
+ //
+ // Note: this might be a non-optimal pop sequence. We might be able to do
+ // better by trying to pop in stack order or something.
+ MachineBasicBlock::iterator InsertPt = MI;
+ while (NumKills)
+ freeStackSlotAfter(InsertPt, Kills[--NumKills]);
+
+ // Don't delete the inline asm!
+ return;
+ }
+
+ case X86::RET:
+ case X86::RETI:
+ // If RET has an FP register use operand, pass the first one in ST(0) and
+ // the second one in ST(1).
+ if (isStackEmpty()) return; // Quick check to see if any are possible.
+
+ // Find the register operands.
+ unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+ continue;
+ // FP Register uses must be kills unless there are two uses of the same
+ // register, in which case only one will be a kill.
+ assert(Op.isUse() &&
+ (Op.isKill() || // Marked kill.
+ getFPReg(Op) == FirstFPRegOp || // Second instance.
+ MI->killsRegister(Op.getReg())) && // Later use is marked kill.
+ "Ret only defs operands, and values aren't live beyond it");
+
+ if (FirstFPRegOp == ~0U)
+ FirstFPRegOp = getFPReg(Op);
+ else {
+ assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+ SecondFPRegOp = getFPReg(Op);
+ }
+
+ // Remove the operand so that later passes don't see it.
+ MI->RemoveOperand(i);
+ --i, --e;
+ }
+
+ // There are only four possibilities here:
+ // 1) we are returning a single FP value. In this case, it has to be in
+ // ST(0) already, so just declare success by removing the value from the
+ // FP Stack.
+ if (SecondFPRegOp == ~0U) {
+ // Assert that the top of stack contains the right FP register.
+ assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+ "Top of stack not the right register for RET!");
+
+ // Ok, everything is good, mark the value as not being on the stack
+ // anymore so that our assertion about the stack being empty at end of
+ // block doesn't fire.
+ StackTop = 0;
+ return;
+ }
+
+ // Otherwise, we are returning two values:
+ // 2) If returning the same value for both, we only have one thing in the FP
+ // stack. Consider: RET FP1, FP1
+ if (StackTop == 1) {
+ assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+ "Stack misconfiguration for RET!");
+
+ // Duplicate the TOS so that we return it twice. Just pick some other FPx
+ // register to hold it.
+ unsigned NewReg = (FirstFPRegOp+1)%7;
+ duplicateToTop(FirstFPRegOp, NewReg, MI);
+ FirstFPRegOp = NewReg;
+ }
+
+ /// Okay we know we have two different FPx operands now:
+ assert(StackTop == 2 && "Must have two values live!");
+
+ /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+ /// in ST(1). In this case, emit an fxch.
+ if (getStackEntry(0) == SecondFPRegOp) {
+ assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+ moveToTop(FirstFPRegOp, MI);
+ }
+
+ /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+ /// ST(1). Just remove both from our understanding of the stack and return.
+ assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+ assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+ StackTop = 0;
+ return;
+ }
+
+ I = MBB->erase(I); // Remove the pseudo instruction
+ --I;
+}
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
new file mode 100644
index 000000000000..009846e2e0b5
--- /dev/null
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -0,0 +1,139 @@
+//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which inserts FP_REG_KILL instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
+
+namespace {
+ struct VISIBILITY_HIDDEN FPRegKiller : public MachineFunctionPass {
+ static char ID;
+ FPRegKiller() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+ };
+ char FPRegKiller::ID = 0;
+}
+
+FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+
+bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
+ // If we are emitting FP stack code, scan the basic block to determine if this
+ // block defines any FP values. If so, put an FP_REG_KILL instruction before
+ // the terminator of the block.
+
+ // Note that FP stack instructions are used in all modes for long double,
+ // so we always need to do this check.
+ // Also note that it's possible for an FP stack register to be live across
+ // an instruction that produces multiple basic blocks (SSE CMOV) so we
+ // must check all the generated basic blocks.
+
+ // Scan all of the machine instructions in these MBBs, checking for FP
+ // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
+
+ // Fast-path: If nothing is using the x87 registers, we don't need to do
+ // any scanning.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
+ MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
+ MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
+ return false;
+
+ bool Changed = false;
+ const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineFunction::iterator MBBI = MF.begin();
+ MachineFunction::iterator EndMBB = MF.end();
+ for (; MBBI != EndMBB; ++MBBI) {
+ MachineBasicBlock *MBB = MBBI;
+
+ // If this block returns, ignore it. We don't want to insert an FP_REG_KILL
+ // before the return.
+ if (!MBB->empty()) {
+ MachineBasicBlock::iterator EndI = MBB->end();
+ --EndI;
+ if (EndI->getDesc().isReturn())
+ continue;
+ }
+
+ bool ContainsFPCode = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ !ContainsFPCode && I != E; ++I) {
+ if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
+ const TargetRegisterClass *clas;
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
+ TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
+ ((clas = MRI.getRegClass(I->getOperand(op).getReg())) ==
+ X86::RFP32RegisterClass ||
+ clas == X86::RFP64RegisterClass ||
+ clas == X86::RFP80RegisterClass)) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block. In SSE mode, we only care about
+ // 80-bit values.
+ if (!ContainsFPCode) {
+ // Final check, check LLVM BB's that are successors to the LLVM BB
+ // corresponding to BB for FP PHI nodes.
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const PHINode *PN;
+ for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
+ !ContainsFPCode && SI != E; ++SI) {
+ for (BasicBlock::const_iterator II = SI->begin();
+ (PN = dyn_cast<PHINode>(II)); ++II) {
+ if (PN->getType()==Type::X86_FP80Ty ||
+ (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
+ (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) {
+ ContainsFPCode = true;
+ break;
+ }
+ }
+ }
+ }
+ // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
+ if (ContainsFPCode) {
+ BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc::getUnknownLoc(),
+ MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
+ ++NumFPKill;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..bd1fea71a6d0
--- /dev/null
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,1716 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+#include "llvm/Support/CommandLine.h"
+static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden);
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+// Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+ /// SDValue's instead of register numbers for the leaves of the matched
+ /// tree.
+ struct X86ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ bool isRIPRel; // RIP as base?
+ unsigned Scale;
+ SDValue IndexReg;
+ int32_t Disp;
+ SDValue Segment;
+ GlobalValue *GV;
+ Constant *CP;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ X86ISelAddressMode()
+ : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
+ Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ void dump() {
+ cerr << "X86ISelAddressMode " << this << "\n";
+ cerr << "Base.Reg ";
+ if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
+ else cerr << "nul";
+ cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
+ cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
+ cerr << "IndexReg ";
+ if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+ else cerr << "nul";
+ cerr << " Disp " << Disp << "\n";
+ cerr << "GV "; if (GV) GV->dump();
+ else cerr << "nul";
+ cerr << " CP "; if (CP) CP->dump();
+ else cerr << "nul";
+ cerr << "\n";
+ cerr << "ES "; if (ES) cerr << ES; else cerr << "nul";
+ cerr << " JT" << JT << " Align" << Align << "\n";
+ }
+ };
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// ISel - X86 specific code to select X86 machine instructions for
+ /// SelectionDAG operations.
+ ///
+ class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
+ /// TM - Keep a reference to X86TargetMachine.
+ ///
+ X86TargetMachine &TM;
+
+ /// X86Lowering - This object fully describes how to lower LLVM code to an
+ /// X86-specific SelectionDAG.
+ X86TargetLowering &X86Lowering;
+
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ /// CurBB - Current BB being isel'd.
+ ///
+ MachineBasicBlock *CurBB;
+
+ /// OptForSize - If true, selector should try to optimize for code size
+ /// instead of performance.
+ bool OptForSize;
+
+ public:
+ explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel),
+ TM(tm), X86Lowering(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ OptForSize(false) {}
+
+ virtual const char *getPassName() const {
+ return "X86 DAG->DAG Instruction Selection";
+ }
+
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
+
+ virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
+
+ virtual
+ bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+ private:
+ SDNode *Select(SDValue N);
+ SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+
+ bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);
+ bool MatchLoad(SDValue N, X86ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool MatchAddress(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth = 0);
+ bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp);
+ bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
+ SDValue N, SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment,
+ SDValue &InChain, SDValue &OutChain);
+ bool TryFoldLoad(SDValue P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
+ void PreprocessForRMW();
+ void PreprocessForFPConvert();
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+
+ void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+ Scale = getI8Imm(AM.Scale);
+ Index = AM.IndexReg;
+ // These are 32-bit even in 64-bit mode since RIP relative offset
+ // is 32-bit.
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+ AM.Align, AM.Disp);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+ if (AM.Segment.getNode())
+ Segment = AM.Segment;
+ else
+ Segment = CurDAG->getRegister(0, MVT::i32);
+ }
+
+ /// getI8Imm - Return a target constant with the specified value, of type
+ /// i8.
+ inline SDValue getI8Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i8);
+ }
+
+ /// getI16Imm - Return a target constant with the specified value, of type
+ /// i16.
+ inline SDValue getI16Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i16);
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getGlobalBaseReg - Return an SDNode that returns the value of
+ /// the global base register. Output instructions required to
+ /// initialize the global base register, if necessary.
+ ///
+ SDNode *getGlobalBaseReg();
+
+#ifndef NDEBUG
+ unsigned Indent;
+#endif
+ };
+}
+
+
+bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ if (U == Root)
+ switch (U->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ SDValue Op1 = U->getOperand(1);
+
+ // If the other operand is a 8-bit immediate we should fold the immediate
+ // instead. This reduces code size.
+ // e.g.
+ // movl 4(%esp), %eax
+ // addl $4, %eax
+ // vs.
+ // movl $4, %eax
+ // addl 4(%esp), %eax
+ // The former is 2 bytes shorter. In case where the increment is 1, then
+ // the saving can be 4 bytes (by using incl %eax).
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+ if (Imm->getAPIntValue().isSignedIntN(8))
+ return false;
+
+ // If the other operand is a TLS address, we should fold it instead.
+ // This produces
+ // movl %gs:0, %eax
+ // leal i@NTPOFF(%eax), %eax
+ // instead of
+ // movl $i@NTPOFF, %eax
+ // addl %gs:0, %eax
+ // if the block also has an access to a second TLS address this will save
+ // a load.
+ // FIXME: This is probably also true for non TLS addresses.
+ if (Op1.getOpcode() == X86ISD::Wrapper) {
+ SDValue Val = Op1.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+ }
+ }
+ }
+
+ // Proceed to 'generic' cycle finder code
+ return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
+}
+
+/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
+/// and move load below the TokenFactor. Replace store's chain operand with
+/// load's chain result.
+static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Store, SDValue TF) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
+ if (Load.getNode() == TF.getOperand(i).getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(TF.getOperand(i));
+ CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
+ CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
+ Store.getOperand(2), Store.getOperand(3));
+}
+
+/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
+///
+static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
+ SDValue &Load) {
+ if (N.getOpcode() == ISD::BIT_CONVERT)
+ N = N.getOperand(0);
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD || LD->isVolatile())
+ return false;
+ if (LD->getAddressingMode() != ISD::UNINDEXED)
+ return false;
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
+ return false;
+
+ if (N.hasOneUse() &&
+ N.getOperand(1) == Address &&
+ N.getNode()->isOperandOf(Chain.getNode())) {
+ Load = N;
+ return true;
+ }
+ return false;
+}
+
+/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
+/// operand and move load below the call's chain operand.
+static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
+ SDValue Call, SDValue CallSeqStart) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Chain = CallSeqStart.getOperand(0);
+ if (Chain.getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else {
+ assert(Chain.getOpcode() == ISD::TokenFactor &&
+ "Unexpected CallSeqStart chain operand");
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i).getNode() == Load.getNode())
+ Ops.push_back(Load.getOperand(0));
+ else
+ Ops.push_back(Chain.getOperand(i));
+ SDValue NewChain =
+ CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+ MVT::Other, &Ops[0], Ops.size());
+ Ops.clear();
+ Ops.push_back(NewChain);
+ }
+ for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
+ Ops.push_back(CallSeqStart.getOperand(i));
+ CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
+ Load.getOperand(1), Load.getOperand(2));
+ Ops.clear();
+ Ops.push_back(SDValue(Load.getNode(), 1));
+ for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+ Ops.push_back(Call.getOperand(i));
+ CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
+ if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+ return false;
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+ if (!LD ||
+ LD->isVolatile() ||
+ LD->getAddressingMode() != ISD::UNINDEXED ||
+ LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ // Now let's find the callseq_start.
+ while (Chain.getOpcode() != ISD::CALLSEQ_START) {
+ if (!Chain.hasOneUse())
+ return false;
+ Chain = Chain.getOperand(0);
+ }
+
+ if (Chain.getOperand(0).getNode() == Callee.getNode())
+ return true;
+ if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+ Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()))
+ return true;
+ return false;
+}
+
+
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// This is only run if not in -O0 mode.
+/// This allows the instruction selector to pick more read-modify-write
+/// instructions. This is a common case:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// / \-
+/// / |
+/// [TokenFactor] [Op]
+/// ^ ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+///
+/// The fact the store's chain operand != load's chain will prevent the
+/// (store (op (load))) instruction from being selected. We can transform it to:
+///
+/// [Load chain]
+/// ^
+/// |
+/// [TokenFactor]
+/// ^
+/// |
+/// [Load]
+/// ^ ^
+/// | |
+/// | \-
+/// | |
+/// | [Op]
+/// | ^
+/// | |
+/// \ /
+/// \ /
+/// [Store]
+void X86DAGToDAGISel::PreprocessForRMW() {
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ if (I->getOpcode() == X86ISD::CALL) {
+ /// Also try moving call address load from outside callseq_start to just
+ /// before the call to allow it to be folded.
+ ///
+ /// [Load chain]
+ /// ^
+ /// |
+ /// [Load]
+ /// ^ ^
+ /// | |
+ /// / \--
+ /// / |
+ ///[CALLSEQ_START] |
+ /// ^ |
+ /// | |
+ /// [LOAD/C2Reg] |
+ /// | |
+ /// \ /
+ /// \ /
+ /// [CALL]
+ SDValue Chain = I->getOperand(0);
+ SDValue Load = I->getOperand(1);
+ if (!isCalleeLoad(Load, Chain))
+ continue;
+ MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+ ++NumLoadMoved;
+ continue;
+ }
+
+ if (!ISD::isNON_TRUNCStore(I))
+ continue;
+ SDValue Chain = I->getOperand(0);
+
+ if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
+ continue;
+
+ SDValue N1 = I->getOperand(1);
+ SDValue N2 = I->getOperand(2);
+ if ((N1.getValueType().isFloatingPoint() &&
+ !N1.getValueType().isVector()) ||
+ !N1.hasOneUse())
+ continue;
+
+ bool RModW = false;
+ SDValue Load;
+ unsigned Opcode = N1.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::VECTOR_SHUFFLE: {
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+ RModW = isRMWLoad(N10, Chain, N2, Load);
+ if (!RModW)
+ RModW = isRMWLoad(N11, Chain, N2, Load);
+ break;
+ }
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ case X86ISD::SHLD:
+ case X86ISD::SHRD: {
+ SDValue N10 = N1.getOperand(0);
+ RModW = isRMWLoad(N10, Chain, N2, Load);
+ break;
+ }
+ }
+
+ if (RModW) {
+ MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
+ ++NumLoadMoved;
+ }
+ }
+}
+
+
+/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
+/// nodes that target the FP stack to be store and load to the stack. This is a
+/// gross hack. We would like to simply mark these as being illegal, but when
+/// we do that, legalize produces these when it expands calls, then expands
+/// these in the same legalize pass. We would like dag combine to be able to
+/// hack on these between the call expansion and the node legalization. As such
+/// this pass basically does "really late" legalization of these inline with the
+/// X86 isel pass.
+void X86DAGToDAGISel::PreprocessForFPConvert() {
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ MVT SrcVT = N->getOperand(0).getValueType();
+ MVT DstVT = N->getValueType(0);
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ DebugLoc dl = N->getDebugLoc();
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+ N->getOperand(0),
+ MemTmp, NULL, 0, MemVT);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ NULL, 0, MemVT);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ CurDAG->DeleteNode(N);
+ }
+}
+
+/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
+/// when it has created a SelectionDAG for us to codegen.
+void X86DAGToDAGISel::InstructionSelect() {
+ CurBB = BB; // BB can change as result of isel.
+ const Function *F = CurDAG->getMachineFunction().getFunction();
+ OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
+
+ DEBUG(BB->dump());
+ if (OptLevel != CodeGenOpt::None)
+ PreprocessForRMW();
+
+ // FIXME: This should only happen when not compiled with -O0.
+ PreprocessForFPConvert();
+
+ // Codegen the basic block.
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection begins:\n";
+ Indent = 0;
+#endif
+ SelectRoot(*CurDAG);
+#ifndef NDEBUG
+ DOUT << "===== Instruction selection ends:\n";
+#endif
+
+ CurDAG->RemoveDeadNodes();
+}
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+ MachineFrameInfo *MFI) {
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ if (Subtarget->isTargetCygMing())
+ BuildMI(BB, DebugLoc::getUnknownLoc(),
+ TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
+ // If this is main, emit special code for main.
+ MachineBasicBlock *BB = MF.begin();
+ if (Fn.hasExternalLinkage() && Fn.getName() == "main")
+ EmitSpecialCodeForMain(BB, MF.getFrameInfo());
+}
+
+
+bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N,
+ X86ISelAddressMode &AM) {
+ assert(N.getOpcode() == X86ISD::SegmentBaseAddress);
+ SDValue Segment = N.getOperand(0);
+
+ if (AM.Segment.getNode() == 0) {
+ AM.Segment = Segment;
+ return false;
+ }
+
+ return true;
+}
+
+bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
+ // This optimization is valid because the GNU TLS model defines that
+ // gs:0 (or fs:0 on X86-64) contains its own address.
+ // For more information see http://people.redhat.com/drepper/tls.pdf
+
+ SDValue Address = N.getOperand(1);
+ if (Address.getOpcode() == X86ISD::SegmentBaseAddress &&
+ !MatchSegmentBaseAddress (Address, AM))
+ return false;
+
+ return true;
+}
+
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+ bool is64Bit = Subtarget->is64Bit();
+ DOUT << "Wrapper: 64bit " << is64Bit;
+ DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
+
+ // Under X86-64 non-small code model, GV (and friends) are 64-bits.
+ if (is64Bit && (TM.getCodeModel() != CodeModel::Small))
+ return true;
+
+ // Base and index reg must be 0 in order to use rip as base.
+ bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode();
+ if (is64Bit && !canUsePICRel && TM.symbolicAddressesAreRIPRel())
+ return true;
+
+ if (AM.hasSymbolicDisplacement())
+ return true;
+ // If value is available in a register both base and index components have
+ // been picked, we can't fit the result available in the register in the
+ // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
+
+ SDValue N0 = N.getOperand(0);
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ uint64_t Offset = G->getOffset();
+ if (!is64Bit || isInt32(AM.Disp + Offset)) {
+ GlobalValue *GV = G->getGlobal();
+ bool isRIPRel = TM.symbolicAddressesAreRIPRel();
+ if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) {
+ TLSModel::Model model =
+ getTLSModel (GV, TM.getRelocationModel());
+ if (is64Bit && model == TLSModel::InitialExec)
+ isRIPRel = true;
+ }
+ AM.GV = GV;
+ AM.Disp += Offset;
+ AM.isRIPRel = isRIPRel;
+ return false;
+ }
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ uint64_t Offset = CP->getOffset();
+ if (!is64Bit || isInt32(AM.Disp + Offset)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += Offset;
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ }
+ } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ AM.isRIPRel = TM.symbolicAddressesAreRIPRel();
+ return false;
+ }
+
+ return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done. This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ bool is64Bit = Subtarget->is64Bit();
+ DebugLoc dl = N.getDebugLoc();
+ DOUT << "MatchAddress: "; DEBUG(AM.dump());
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM);
+
+ // RIP relative addressing: %rip + 32-bit displacement!
+ if (AM.isRIPRel) {
+ if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!is64Bit || isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ }
+ return true;
+ }
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ if (!is64Bit || isInt32(AM.Disp + Val)) {
+ AM.Disp += Val;
+ return false;
+ }
+ break;
+ }
+
+ case X86ISD::SegmentBaseAddress:
+ if (!MatchSegmentBaseAddress(N, AM))
+ return false;
+ break;
+
+ case X86ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::LOAD:
+ if (!MatchLoad(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == X86ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
+ }
+ break;
+
+ case ISD::SHL:
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
+ break;
+
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+ unsigned Val = CN->getZExtValue();
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDValue ShVal = N.getNode()->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
+ isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) {
+ AM.IndexReg = ShVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+ uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
+ if (!is64Bit || isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ AM.IndexReg = ShVal;
+ } else {
+ AM.IndexReg = ShVal;
+ }
+ return false;
+ }
+ break;
+ }
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.getResNo() != 0) break;
+ // FALL THROUGH
+ case ISD::MUL:
+ case X86ISD::MUL_IMM:
+ // X*[3,5,9] -> X+X*[2,4,8]
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() == 0 &&
+ AM.IndexReg.getNode() == 0 &&
+ !AM.isRIPRel) {
+ if (ConstantSDNode
+ *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+ if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+ CN->getZExtValue() == 9) {
+ AM.Scale = unsigned(CN->getZExtValue())-1;
+
+ SDValue MulVal = N.getNode()->getOperand(0);
+ SDValue Reg;
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+ isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+ Reg = MulVal.getNode()->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+ uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
+ CN->getZExtValue();
+ if (!is64Bit || isInt32(Disp))
+ AM.Disp = Disp;
+ else
+ Reg = N.getNode()->getOperand(0);
+ } else {
+ Reg = N.getNode()->getOperand(0);
+ }
+
+ AM.IndexReg = AM.Base.Reg = Reg;
+ return false;
+ }
+ }
+ break;
+
+ case ISD::SUB: {
+ // Given A-B, if A can be completely folded into the address and
+ // the index field with the index field unused, use -B as the index.
+ // This is a win if a has multiple parts that can be folded into
+ // the address. Also, this saves a mov if the base register has
+ // other uses, since it avoids a two-address sub instruction, however
+ // it costs an additional mov if the index register has other uses.
+
+ // Test if the LHS of the sub can be folded.
+ X86ISelAddressMode Backup = AM;
+ if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
+ AM = Backup;
+ break;
+ }
+ // Test if the index field is free for use.
+ if (AM.IndexReg.getNode() || AM.isRIPRel) {
+ AM = Backup;
+ break;
+ }
+ int Cost = 0;
+ SDValue RHS = N.getNode()->getOperand(1);
+ // If the RHS involves a register with multiple uses, this
+ // transformation incurs an extra mov, due to the neg instruction
+ // clobbering its operand.
+ if (!RHS.getNode()->hasOneUse() ||
+ RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+ RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+ RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+ (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+ ++Cost;
+ // If the base is a register with multiple uses, this
+ // transformation may save a mov.
+ if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base.Reg.getNode() &&
+ !AM.Base.Reg.getNode()->hasOneUse()) ||
+ AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ --Cost;
+ // If the folded LHS was interesting, this transformation saves
+ // address arithmetic.
+ if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+ ((AM.Disp != 0) && (Backup.Disp == 0)) +
+ (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+ --Cost;
+ // If it doesn't look like it may be an overall win, don't do it.
+ if (Cost >= 0) {
+ AM = Backup;
+ break;
+ }
+
+ // Ok, the transformation is legal and appears profitable. Go for it.
+ SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+ SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+ AM.IndexReg = Neg;
+ AM.Scale = 1;
+
+ // Insert the new nodes into the topological ordering.
+ if (Zero.getNode()->getNodeId() == -1 ||
+ Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+ Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Neg.getNode()->getNodeId() == -1 ||
+ Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+ Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ return false;
+ }
+
+ case ISD::ADD: {
+ X86ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base.Reg.getNode() &&
+ !AM.IndexReg.getNode() &&
+ !AM.isRIPRel) {
+ AM.Base.Reg = N.getNode()->getOperand(0);
+ AM.IndexReg = N.getNode()->getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ X86ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // On x86-64, the resultant disp must fit in 32-bits.
+ (!is64Bit || isInt32(AM.Disp + Offset)) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
+ }
+ break;
+
+ case ISD::AND: {
+ // Perform some heroic transforms on an and of a constant-count shift
+ // with a constant to enable use of the scaled offset field.
+
+ SDValue Shift = N.getOperand(0);
+ if (Shift.getNumOperands() != 2) break;
+
+ // Scale must not be used already.
+ if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+ // Not when RIP is used as the base.
+ if (AM.isRIPRel) break;
+
+ SDValue X = Shift.getOperand(0);
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C1 || !C2) break;
+
+ // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+ // allows us to convert the shift and and into an h-register extract and
+ // a scaled index.
+ if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+ unsigned ScaleLog = 8 - C1->getZExtValue();
+ if (ScaleLog > 0 && ScaleLog < 4 &&
+ C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+ SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+ SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+ SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ X, Eight);
+ SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+ Srl, Mask);
+ SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+ SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ And, ShlCount);
+
+ // Insert the new nodes into the topological ordering.
+ if (Eight.getNode()->getNodeId() == -1 ||
+ Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+ Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Mask.getNode()->getNodeId() == -1 ||
+ Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+ Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (Srl.getNode()->getNodeId() == -1 ||
+ Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+ Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (And.getNode()->getNodeId() == -1 ||
+ And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), And.getNode());
+ And.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (ShlCount.getNode()->getNodeId() == -1 ||
+ ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+ ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ if (Shl.getNode()->getNodeId() == -1 ||
+ Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+ Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+ CurDAG->ReplaceAllUsesWith(N, Shl);
+ AM.IndexReg = And;
+ AM.Scale = (1 << ScaleLog);
+ return false;
+ }
+ }
+
+ // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (Shift.getOpcode() != ISD::SHL) break;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ break;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftCst = C1->getZExtValue();
+ if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ break;
+
+ // Get the new AND mask, this folds to a constant.
+ SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+ SDValue(C2, 0), SDValue(C1, 0));
+ SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X,
+ NewANDMask);
+ SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+ NewAND, SDValue(C1, 0));
+
+ // Insert the new nodes into the topological ordering.
+ if (C1->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), C1);
+ C1->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewANDMask.getNode()->getNodeId() == -1 ||
+ NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+ NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+ }
+ if (NewAND.getNode()->getNodeId() == -1 ||
+ NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+ NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+ }
+ if (NewSHIFT.getNode()->getNodeId() == -1 ||
+ NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+ CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+ NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+ }
+
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+
+ AM.Scale = 1 << ShiftCst;
+ AM.IndexReg = NewAND;
+ return false;
+ }
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, check to see if the scale index register is set.
+ if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
+ AM.IndexReg = N;
+ AM.Scale = 1;
+ return false;
+ }
+
+ // Otherwise, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = X86ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment) {
+ X86ISelAddressMode AM;
+ bool Done = false;
+ if (AvoidDupAddrCompute && !N.hasOneUse()) {
+ unsigned Opcode = N.getOpcode();
+ if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
+ Opcode != X86ISD::Wrapper) {
+ // If we are able to fold N into addressing mode, then we'll allow it even
+ // if N has multiple uses. In general, addressing computation is used as
+ // addresses by all of its uses. But watch out for CopyToReg uses, that
+ // means the address computation is liveout. It will be computed by a LEA
+ // so we want to avoid computing the address twice.
+ for (SDNode::use_iterator UI = N.getNode()->use_begin(),
+ UE = N.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::CopyToReg) {
+ MatchAddressBase(N, AM);
+ Done = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!Done && MatchAddress(N, AM))
+ return false;
+
+ MVT VT = N.getValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
+
+ if (!AM.IndexReg.getNode())
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred,
+ SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp, SDValue &Segment,
+ SDValue &InChain,
+ SDValue &OutChain) {
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ InChain = N.getOperand(0).getValue(1);
+ if (ISD::isNON_EXTLoad(InChain.getNode()) &&
+ InChain.getValue(0).hasOneUse() &&
+ N.hasOneUse() &&
+ IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(InChain);
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ OutChain = LD->getChain();
+ return true;
+ }
+ }
+
+ // Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+ if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getNode()->hasOneUse() &&
+ ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+ N.getOperand(0).getOperand(0).hasOneUse()) {
+ // Okay, this is a zero extending load. Fold it.
+ LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+ if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ return false;
+ OutChain = LD->getChain();
+ InChain = SDValue(LD, 1);
+ return true;
+ }
+ return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp) {
+ X86ISelAddressMode AM;
+
+ // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+ // segments.
+ SDValue Copy = AM.Segment;
+ SDValue T = CurDAG->getRegister(0, MVT::i32);
+ AM.Segment = T;
+ if (MatchAddress(N, AM))
+ return false;
+ assert (T == AM.Segment);
+ AM.Segment = Copy;
+
+ MVT VT = N.getValueType();
+ unsigned Complexity = 0;
+ if (AM.BaseType == X86ISelAddressMode::RegBase)
+ if (AM.Base.Reg.getNode())
+ Complexity = 1;
+ else
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Complexity = 4;
+
+ if (AM.IndexReg.getNode())
+ Complexity++;
+ else
+ AM.IndexReg = CurDAG->getRegister(0, VT);
+
+ // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+ // a simple shift.
+ if (AM.Scale > 1)
+ Complexity++;
+
+ // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+ // to a LEA. This is determined with some expermentation but is by no means
+ // optimal (especially for code size consideration). LEA is nice because of
+ // its three-address nature. Tweak the cost function again when we can run
+ // convertToThreeAddress() at register allocation time.
+ if (AM.hasSymbolicDisplacement()) {
+ // For X86-64, we should always use lea to materialize RIP relative
+ // addresses.
+ if (Subtarget->is64Bit())
+ Complexity = 4;
+ else
+ Complexity += 2;
+ }
+
+ if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+ Complexity++;
+
+ if (Complexity > 2) {
+ SDValue Segment;
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+ }
+ return false;
+}
+
+bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+ SDValue &Base, SDValue &Scale,
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
+ if (ISD::isNON_EXTLoad(N.getNode()) &&
+ N.hasOneUse() &&
+ IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+ return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
+ return false;
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+ MachineFunction *MF = CurBB->getParent();
+ unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+static SDNode *FindCallStartFromCall(SDNode *Node) {
+ if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ return FindCallStartFromCall(Node->getOperand(0).getNode());
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = Node->getOperand(2);
+ SDValue In2H = Node->getOperand(3);
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+ return NULL;
+ SDValue LSI = Node->getOperand(4); // MemOperand
+ const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain};
+ return CurDAG->getTargetNode(Opc, Node->getDebugLoc(),
+ MVT::i32, MVT::i32, MVT::Other, Ops,
+ array_lengthof(Ops));
+}
+
+SDNode *X86DAGToDAGISel::Select(SDValue N) {
+ SDNode *Node = N.getNode();
+ MVT NVT = Node->getValueType(0);
+ unsigned Opc, MOpc;
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent, ' ') << "Selecting: ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent += 2;
+#endif
+
+ if (Node->isMachineOpcode()) {
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "== ";
+ DEBUG(Node->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return NULL; // Already selected.
+ }
+
+ switch (Opcode) {
+ default: break;
+ case X86ISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case X86ISD::ATOMOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMOR6432);
+ case X86ISD::ATOMXOR64_DAG:
+ return SelectAtomic64(Node, X86::ATOMXOR6432);
+ case X86ISD::ATOMADD64_DAG:
+ return SelectAtomic64(Node, X86::ATOMADD6432);
+ case X86ISD::ATOMSUB64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSUB6432);
+ case X86ISD::ATOMNAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMNAND6432);
+ case X86ISD::ATOMAND64_DAG:
+ return SelectAtomic64(Node, X86::ATOMAND6432);
+ case X86ISD::ATOMSWAP64_DAG:
+ return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned)
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
+ case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+ case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+ case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+ }
+ else
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
+ case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+ case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+ case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
+ case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
+ case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+ case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // multiplty is commmutative
+ if (!foldedLoad) {
+ foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (foldedLoad)
+ std::swap(N0, N1);
+ }
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ N0, SDValue()).getValue(1);
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the low half of the result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
+ Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ Indent -= 2;
+#endif
+
+ return NULL;
+ }
+
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
+ if (!isSigned)
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
+ case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+ case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+ case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+ }
+ else
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
+ case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+ case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+ case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+ }
+
+ unsigned LoReg, HiReg;
+ unsigned ClrOpcode, SExtOpcode;
+ switch (NVT.getSimpleVT()) {
+ default: assert(0 && "Unsupported VT!");
+ case MVT::i8:
+ LoReg = X86::AL; HiReg = X86::AH;
+ ClrOpcode = 0;
+ SExtOpcode = X86::CBW;
+ break;
+ case MVT::i16:
+ LoReg = X86::AX; HiReg = X86::DX;
+ ClrOpcode = X86::MOV16r0;
+ SExtOpcode = X86::CWD;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX; HiReg = X86::EDX;
+ ClrOpcode = X86::MOV32r0;
+ SExtOpcode = X86::CDQ;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX; HiReg = X86::RDX;
+ ClrOpcode = X86::MOV64r0;
+ SExtOpcode = X86::CQO;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+ SDValue InFlag;
+ if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+ // Special case for div8, just use a move with zero extension to AX to
+ // clear the upper 8 bits (AH).
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+ if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ Move =
+ SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16,
+ MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ Chain = Move.getValue(1);
+ ReplaceUses(N0.getValue(1), Chain);
+ } else {
+ Move =
+ SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+ Chain = CurDAG->getEntryNode();
+ }
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+ InFlag = Chain.getValue(1);
+ } else {
+ InFlag =
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ LoReg, N0, SDValue()).getValue(1);
+ if (isSigned && !signBitIsZero) {
+ // Sign extend the low part into the high part.
+ InFlag =
+ SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0);
+ } else {
+ // Zero out the high part, effectively zero extending the input.
+ SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT),
+ 0);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg,
+ ClrNode, InFlag).getValue(1);
+ }
+ }
+
+ if (foldedLoad) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ SDNode *CNode =
+ CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
+ array_lengthof(Ops));
+ InFlag = SDValue(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+ } else {
+ InFlag =
+ SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
+ }
+
+ // Copy the division (low) result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDValue Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ // Then truncate it down to i8.
+ SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32);
+ Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.getNode()->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+
+#ifndef NDEBUG
+ Indent -= 2;
+#endif
+
+ return NULL;
+ }
+
+ case ISD::DECLARE: {
+ // Handle DECLARE nodes here because the second operand may have been
+ // wrapped in X86ISD::Wrapper.
+ SDValue Chain = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+
+ // FIXME: We need to handle this for VLAs.
+ if (!FINode) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+
+ if (N2.getOpcode() == ISD::ADD &&
+ N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg)
+ N2 = N2.getOperand(1);
+
+ // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
+ // somehow, just ignore it.
+ if (N2.getOpcode() != X86ISD::Wrapper) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+ GlobalAddressSDNode *GVNode =
+ dyn_cast<GlobalAddressSDNode>(N2.getOperand(0));
+ if (GVNode == 0) {
+ ReplaceUses(N.getValue(0), Chain);
+ return NULL;
+ }
+ SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
+ TLI.getPointerTy());
+ SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(),
+ TLI.getPointerTy());
+ SDValue Ops[] = { Tmp1, Tmp2, Chain };
+ return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
+ MVT::Other, Ops,
+ array_lengthof(Ops));
+ }
+ }
+
+ SDNode *ResNode = SelectCode(N);
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ if (ResNode == NULL || ResNode == N.getNode())
+ DEBUG(N.getNode()->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+
+ return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1, Op2, Op3, Op4;
+ switch (ConstraintCode) {
+ case 'o': // offsetable ??
+ case 'v': // not offsetable ??
+ default: return true;
+ case 'm': // memory
+ if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4))
+ return true;
+ break;
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ OutOps.push_back(Op2);
+ OutOps.push_back(Op3);
+ OutOps.push_back(Op4);
+ return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel) {
+ return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 000000000000..882ee3a01f1e
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,8794 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2);
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+ : TargetLowering(TM) {
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ X86ScalarSSEf64 = Subtarget->hasSSE2();
+ X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+ RegInfo = TM.getRegisterInfo();
+ TD = getTargetData();
+
+ // Set up the TargetLowering object.
+
+ // X86 is weird, it always uses i8 for shift amounts and setcc results.
+ setShiftAmountType(MVT::i8);
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setSchedulingPreference(SchedulingForRegPressure);
+ setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
+ setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+ if (Subtarget->isTargetDarwin()) {
+ // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(false);
+ setUseUnderscoreLongJmp(false);
+ } else if (Subtarget->isTargetMingw()) {
+ // MS runtime is weird: it exports _setjmp, but longjmp!
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(false);
+ } else {
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+ }
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+ addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+ addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+ if (Subtarget->is64Bit())
+ addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+ // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+ // operation.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
+ } else if (!UseSoftFloat) {
+ if (X86ScalarSSEf64) {
+ // We have an impenetrably clever algorithm for ui64->double only.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
+ }
+ // We have an algorithm for SSE2, and we turn this into a 64-bit
+ // FILD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ }
+
+ // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
+
+ if (!UseSoftFloat && !NoImplicitFloat) {
+ // SSE has no i16 to fp conversion, only i32
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
+ }
+ } else {
+ setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
+ }
+
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+ // this operation.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
+
+ // Handle FP_TO_UINT by promoting the destination to a larger signed
+ // conversion.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ } else if (!UseSoftFloat) {
+ if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+ // Expand FP_TO_UINT into a select.
+ // FIXME: We would like to use a Custom expander here eventually to do
+ // the optimal thing for SSE vs. the default expansion in the legalizer.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
+ else
+ // With SSE3 we can use fisttpll to convert to a signed i64; without
+ // SSE, we're stuck with a fistpll.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ }
+
+ // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+ if (!X86ScalarSSEf64) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
+ setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ }
+
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
+ setOperationAction(ISD::MULHS , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i8 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i8 , Expand);
+ setOperationAction(ISD::SREM , MVT::i8 , Expand);
+ setOperationAction(ISD::UREM , MVT::i8 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i16 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i16 , Expand);
+ setOperationAction(ISD::SREM , MVT::i16 , Expand);
+ setOperationAction(ISD::UREM , MVT::i16 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i32 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i32 , Expand);
+ setOperationAction(ISD::SREM , MVT::i32 , Expand);
+ setOperationAction(ISD::UREM , MVT::i32 , Expand);
+ setOperationAction(ISD::MULHS , MVT::i64 , Expand);
+ setOperationAction(ISD::MULHU , MVT::i64 , Expand);
+ setOperationAction(ISD::SDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::UDIV , MVT::i64 , Expand);
+ setOperationAction(ISD::SREM , MVT::i64 , Expand);
+ setOperationAction(ISD::UREM , MVT::i64 , Expand);
+
+ setOperationAction(ISD::BR_JT , MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND , MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
+
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ }
+
+ setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+
+ // These should be promoted to a larger select which is supported.
+ setOperationAction(ISD::SELECT , MVT::i1 , Promote);
+ setOperationAction(ISD::SELECT , MVT::i8 , Promote);
+ // X86 wants to expand cmov itself.
+ setOperationAction(ISD::SELECT , MVT::i16 , Custom);
+ setOperationAction(ISD::SELECT , MVT::i32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f32 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f64 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f32 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SELECT , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ }
+ // X86 ret instruction may pop stack.
+ setOperationAction(ISD::RET , MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+
+ // Darwin ABI issue.
+ setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
+ setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
+ setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
+ setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
+ }
+ // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+ setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasSSE1())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ if (!Subtarget->hasSSE2())
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+
+ // Expand certain atomics
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ }
+
+ // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ // FIXME - use subtarget debug flags
+ if (!Subtarget->isTargetDarwin() &&
+ !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing()) {
+ setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
+ setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ if (Subtarget->is64Bit()) {
+ setExceptionPointerRegister(X86::RAX);
+ setExceptionSelectorRegister(X86::RDX);
+ } else {
+ setExceptionPointerRegister(X86::EAX);
+ setExceptionSelectorRegister(X86::EDX);
+ }
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+ setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::VAARG , MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ }
+
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ if (Subtarget->isTargetCygMing())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ if (!UseSoftFloat && X86ScalarSSEf64) {
+ // f32 and f64 use SSE.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+ // Use ANDPD to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f64, Custom);
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f64, Custom);
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ // Use ANDPD and ORPD to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Expand FP immediates into loads from the stack, except for the special
+ // cases we handle.
+ addLegalFPImmediate(APFloat(+0.0)); // xorpd
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ } else if (!UseSoftFloat && X86ScalarSSEf32) {
+ // Use SSE for f32, x87 for f64.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+ // Use ANDPS to simulate FABS.
+ setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+ // Use XORP to simulate FNEG.
+ setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+
+ // Use ANDPS and ORPS to simulate FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // We don't support sin/cos/fmod
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+ // Special cases we handle for FP constants.
+ addLegalFPImmediate(APFloat(+0.0f)); // xorps
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ } else if (!UseSoftFloat) {
+ // f32 and f64 in x87.
+ // Set up the FP register classes.
+ addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+ addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+ setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+ setOperationAction(ISD::UNDEF, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f64 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f64 , Expand);
+ }
+ addLegalFPImmediate(APFloat(+0.0)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+ addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+ addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+ addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+ addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+ }
+
+ // Long double always uses X87.
+ if (!UseSoftFloat) {
+ addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+ setOperationAction(ISD::UNDEF, MVT::f80, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+ {
+ bool ignored;
+ APFloat TmpFlt(+0.0);
+ TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
+ if (!UnsafeFPMath) {
+ setOperationAction(ISD::FSIN , MVT::f80 , Expand);
+ setOperationAction(ISD::FCOS , MVT::f80 , Expand);
+ }
+ }
+
+ // Always use a library call for pow.
+ setOperationAction(ISD::FPOW , MVT::f32 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f64 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+
+ setOperationAction(ISD::FLOG, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP, MVT::f80, Expand);
+ setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+ setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+ }
+
+ // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+ // with -msoft-float, disable use of MMX as well.
+ if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v2f32, X86::VR64RegisterClass);
+ addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v8i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::SUB, MVT::v8i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v4i16, Legal);
+
+ setOperationAction(ISD::AND, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::AND, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::OR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::OR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::XOR, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::XOR, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
+ setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
+
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasSSE1()) {
+ addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
+ }
+
+ if (!UseSoftFloat && Subtarget->hasSSE2()) {
+ addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+ // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+ // registers cannot be used even for integer operations.
+ addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+ addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+ setOperationAction(ISD::ADD, MVT::v16i8, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v4i32, Legal);
+ setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i8, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v4i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v8i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::VSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v16i8, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v8i16, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
+
+ // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+ AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom lower v2i64 and v2f64 selects.
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+
+ }
+
+ if (Subtarget->hasSSE41()) {
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ }
+
+ if (Subtarget->hasSSE42()) {
+ setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
+ }
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::UMULO, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, 0);
+ setLibcallName(RTLIB::SRL_I128, 0);
+ setLibcallName(RTLIB::SRA_I128, 0);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::STORE);
+ if (Subtarget->is64Bit())
+ setTargetDAGCombine(ISD::MUL);
+
+ computeRegisterProperties();
+
+ // FIXME: These should be based on subtarget info. Plus, the values should
+ // be smaller when we are in optimizing for size mode.
+ maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+ maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
+ allowUnalignedMemoryAccesses = true; // x86 supports it!
+ setPrefLoopAlignment(16);
+ benefitFromCodePlacementOpt = true;
+}
+
+
+MVT X86TargetLowering::getSetCCResultType(MVT VT) const {
+ return MVT::i8;
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ if (Subtarget->is64Bit()) {
+ // Max of 8 and alignment of type.
+ unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ if (TyAlign > 8)
+ return TyAlign;
+ return 8;
+ }
+
+ unsigned Align = 4;
+ if (Subtarget->hasSSE1())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+/// determining it.
+MVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const {
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ }
+ if (Subtarget->is64Bit() && Size >= 8)
+ return MVT::i64;
+ return MVT::i32;
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+ if (!Subtarget->isPICStyleRIPRel())
+ // This doesn't have DebugLoc associated with it, but is not really the
+ // same as a Register.
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
+ getPointerTy());
+ return Table;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+/// LowerRET - Lower an ISD::RET node.
+SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+ SDValue Chain = Op.getOperand(0);
+
+ // Handle tail call return.
+ Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
+ SDValue TailCall = Chain;
+ SDValue TargetAddress = TailCall.getOperand(1);
+ SDValue StackAdjustment = TailCall.getOperand(2);
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
+ (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
+ cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+ TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
+ TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
+ "Expecting an global address, external symbol, or register");
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
+
+ SmallVector<SDValue,8> Operands;
+ Operands.push_back(Chain.getOperand(0));
+ Operands.push_back(TargetAddress);
+ Operands.push_back(StackAdjustment);
+ // Copy registers used by the call. Last operand is a flag so it is not
+ // copied.
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
+ Operands.push_back(Chain.getOperand(i));
+ }
+ return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0],
+ Operands.size());
+ }
+
+ // Regular return.
+ SDValue Flag;
+
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue ValToCopy = Op.getOperand(i*2+1);
+
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(VA.getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
+
+ // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+ // which is returned in RAX / RDX.
+ if (Subtarget->is64Bit()) {
+ MVT ValVT = ValToCopy.getValueType();
+ if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() &&
+ DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+}
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *X86TargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+
+ DebugLoc dl = TheCall->getDebugLoc();
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ bool isVarArg = TheCall->isVarArg();
+ bool Is64Bit = Subtarget->is64Bit();
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
+
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT CopyVT = VA.getValVT();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
+ cerr << "SSE register return with SSE disabled\n";
+ exit(1);
+ }
+
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, but where we prefer to use the value in xmm registers, copy
+ // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if ((VA.getLocReg() == X86::ST0 ||
+ VA.getLocReg() == X86::ST1) &&
+ isScalarFPTypeInSSEReg(VA.getValVT())) {
+ CopyVT = MVT::f80;
+ }
+
+ SDValue Val;
+ if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+ // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ MVT::v2i64, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ Val, DAG.getConstant(0, MVT::i64));
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ MVT::i64, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
+ } else {
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ Val = Chain.getValue(0);
+ }
+ InFlag = Chain.getValue(2);
+
+ if (CopyVT != VA.getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register.
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ }
+
+ ResultVals.push_back(Val);
+ }
+
+ // Merge everything together with a MERGE_VALUES node.
+ ResultVals.push_back(Chain);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+
+//===----------------------------------------------------------------------===//
+// C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+// StdCall calling convention seems to be standard for many Windows' API
+// routines and around. It differs from C calling convention just a little:
+// callee should clean up the stack, not caller. Symbols should be also
+// decorated in some fancy way :) It doesn't support any vector arguments.
+// For info on fast calling convention see Fast Calling Convention (tail call)
+// implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a CALL node uses struct return
+/// semantics.
+static bool CallIsStructReturn(CallSDNode *TheCall) {
+ unsigned NumOps = TheCall->getNumArgs();
+ if (!NumOps)
+ return false;
+
+ return TheCall->getArgFlags(0).isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// return semantics.
+static bool ArgsAreStructReturn(SDValue Op) {
+ unsigned NumArgs = Op.getNode()->getNumValues() - 1;
+ if (!NumArgs)
+ return false;
+
+ return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
+}
+
+/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
+/// the callee to pop its own arguments. Callee pop is necessary to support tail
+/// calls.
+bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::X86_FastCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::Fast:
+ return PerformTailCallOpt;
+ }
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return CC_X86_Win64_C;
+ else if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+ }
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast)
+ return CC_X86_32_FastCC;
+ else
+ return CC_X86_32_C;
+}
+
+/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
+/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
+NameDecorationStyle
+X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (CC == CallingConv::X86_FastCall)
+ return FastCall;
+ else if (CC == CallingConv::X86_StdCall)
+ return StdCall;
+ return None;
+}
+
+
+/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
+/// in a register before calling.
+bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
+ return !IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+/// CallRequiresFnAddressInReg - Check whether the call requires the function
+/// address to be loaded in a register.
+bool
+X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
+ return !Is64Bit && IsTailCall &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ DebugLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/true, NULL, 0, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned CC,
+ SDValue Root, unsigned i) {
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
+ bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0);
+}
+
+SDValue
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ // Decorate the function name.
+ FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsWin64 = Subtarget->isTargetWin64();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
+
+ SmallVector<SDValue, 8> ArgValues;
+ unsigned LastVal = ~0U;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+ // places.
+ assert(VA.getValNo() != LastVal &&
+ "Don't support value assigned to multiple locs yet");
+ LastVal = VA.getValNo();
+
+ if (VA.isRegLoc()) {
+ MVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = NULL;
+ if (RegVT == MVT::i32)
+ RC = X86::GR32RegisterClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+ RC = X86::VR128RegisterClass;
+ else if (RegVT.isVector()) {
+ assert(RegVT.getSizeInBits() == 64);
+ if (!Is64Bit)
+ RC = X86::VR64RegisterClass; // MMX values are passed in MMXs.
+ else {
+ // Darwin calling convention passes MMX values in either GPRs or
+ // XMMs in x86-64. Other targets pass them in memory.
+ if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
+ RC = X86::VR128RegisterClass; // MMX values are passed in XMMs.
+ RegVT = MVT::v2i64;
+ } else {
+ RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs.
+ RegVT = MVT::i64;
+ }
+ }
+ } else {
+ assert(0 && "Unknown argument type!");
+ }
+
+ unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it is really passed promoted to 32
+ // bits. Insert an assert[sz]ext to capture this, then truncate to the
+ // right size.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.getLocInfo() != CCValAssign::Full)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+ // Handle MMX values passed in GPRs.
+ if (Is64Bit && RegVT != VA.getLocVT()) {
+ if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
+ else if (RC == X86::VR128RegisterClass) {
+ ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+ ArgValue, DAG.getConstant(0, MVT::i64));
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
+ }
+ }
+
+ ArgValues.push_back(ArgValue);
+ } else {
+ assert(VA.isMemLoc());
+ ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+ }
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
+ }
+
+ unsigned StackSize = CCInfo.getNextStackOffset();
+ // align stack specially for tail calls
+ if (PerformTailCallOpt && CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ }
+ if (Is64Bit) {
+ unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+ // FIXME: We should really autogenerate these arrays
+ static const unsigned GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegsWin64[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
+ };
+ static const unsigned GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs64Bit[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ const unsigned *GPR64ArgRegs, *XMMArgRegs;
+
+ if (IsWin64) {
+ TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
+ GPR64ArgRegs = GPR64ArgRegsWin64;
+ XMMArgRegs = XMMArgRegsWin64;
+ } else {
+ TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+ GPR64ArgRegs = GPR64ArgRegs64Bit;
+ XMMArgRegs = XMMArgRegs64Bit;
+ }
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+ TotalNumIntRegs);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
+ TotalNumXMMRegs);
+
+ assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
+ "SSE register cannot be used when SSE is disabled!");
+ assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) &&
+ "SSE register cannot be used when SSE is disabled!");
+ if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1())
+ // Kernel mode asks for SSE to be disabled, so don't push them
+ // on the stack.
+ TotalNumXMMRegs = 0;
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
+ TotalNumXMMRegs * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDValue, 8> MemOps;
+ SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsGPOffset));
+ for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+ unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(8));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsFPOffset));
+ for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+ unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(16));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Some CCs need callee pop.
+ if (IsCalleePop(isVarArg, CC)) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
+ BytesCallerReserves = 0;
+ } else {
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))
+ BytesToPopOnReturn = 4;
+ BytesCallerReserves = StackSize;
+ }
+
+ if (!Is64Bit) {
+ RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
+ if (CC == CallingConv::X86_FastCall)
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ }
+
+ FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
+
+ // Return the new list of results.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA,
+ SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags) {
+ DebugLoc dl = TheCall->getDebugLoc();
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+ }
+ return DAG.getStore(Chain, dl, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDValue &OutRetAddr,
+ SDValue Chain,
+ bool IsTailCall,
+ bool Is64Bit,
+ int FPDiff,
+ DebugLoc dl) {
+ if (!IsTailCall || FPDiff==0) return Chain;
+
+ // Adjust the Return address stack slot.
+ MVT VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
+ return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ bool Is64Bit, int FPDiff, DebugLoc dl) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
+ return Chain;
+}
+
+SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ unsigned CC = TheCall->getCallingConv();
+ bool isVarArg = TheCall->isVarArg();
+ bool IsTailCall = TheCall->isTailCall() &&
+ CC == CallingConv::Fast && PerformTailCallOpt;
+ SDValue Callee = TheCall->getCallee();
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsStructRet = CallIsStructReturn(TheCall);
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ if (PerformTailCallOpt && CC == CallingConv::Fast)
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ int FPDiff = 0;
+ if (IsTailCall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SDValue RetAddrFrIdx;
+ // Load return adress for tail calls.
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+ FPDiff, dl);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+
+ // Walk the register/memloc assignments, inserting copies/loads. In the case
+ // of tail call optimization arguments are handle later.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ bool isByVal = Flags.isByVal();
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ if (Is64Bit) {
+ MVT RegVT = VA.getLocVT();
+ if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
+ switch (VA.getLocReg()) {
+ default:
+ break;
+ case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
+ case X86::R8: {
+ // Special case: passing MMX values in GPR registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ break;
+ }
+ case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
+ case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
+ // Special case: passing MMX values in XMM registers.
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
+ Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+ Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+ break;
+ }
+ }
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ if (!IsTailCall || (IsTailCall && isByVal)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+ Chain, Arg, Flags));
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!IsTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ // If we are tail calling and generating PIC/GOT style code load the address
+ // of the callee into ecx. The value in ecx is used as target of the tail
+ // jump. This is done to circumvent the ebx/callee-saved problem for tail
+ // calls on PIC/GOT architectures. Normally we would just put the address of
+ // GOT into ebx and then call target@PLT. But for tail callss ebx would be
+ // restored (since ebx is callee saved) before jumping to the target@PLT.
+ if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
+ // Note: The actual moving to ecx is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee,DAG);
+ }
+
+ if (Is64Bit && isVarArg) {
+ // From AMD64 ABI document:
+ // For calls that may call functions that use varargs or stdargs
+ // (prototype-less calls or calls to functions containing ellipsis (...) in
+ // the declaration) %al is used as hidden argument to specify the number
+ // of SSE registers used. The contents of %al do not need to match exactly
+ // the number of registers, but must be an ubound on the number of SSE
+ // registers used and is in the range 0 - 8 inclusive.
+
+ // FIXME: Verify this on Win64
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ assert((Subtarget->hasSSE1() || !NumXMMRegs)
+ && "SSE registers cannot be used when SSE is disabled");
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+ DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (IsTailCall) {
+ SmallVector<SDValue, 8> MemOpChains2;
+ SDValue FIN;
+ int FI = 0;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc()) {
+ assert(VA.isMemLoc());
+ SDValue Arg = TheCall->getArg(i);
+ ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(Chain, dl, Arg, FIN,
+ PseudoSourceValue::getFixedStack(FI), 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Copy arguments to their registers.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff, dl);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // We should use extra load for direct calls to dllimported functions in
+ // non-JIT mode.
+ if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
+ getTargetMachine(), true))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
+ G->getOffset());
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ } else if (IsTailCall) {
+ unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+
+ Chain = DAG.getCopyToReg(Chain, dl,
+ DAG.getRegister(Opc, getPointerTy()),
+ Callee,InFlag);
+ Callee = DAG.getRegister(Opc, getPointerTy());
+ // Add register as live out.
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ }
+
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+
+ if (IsTailCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Returns a chain & a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ if (IsTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ // Add an implicit use of AL for x86 vararg functions.
+ if (Is64Bit && isVarArg)
+ Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ if (IsTailCall) {
+ assert(InFlag.getNode() &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(X86ISD::TAILCALL, dl,
+ TheCall->getVTList(), &Ops[0], Ops.size());
+
+ return SDValue(Chain.getNode(), Op.getResNo());
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (IsCalleePop(isVarArg, CC))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
+ NumBytesForCalleeToPush = 0; // Callee pops nothing.
+
+ // Returns a flag for retval copy to use.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ uint64_t SlotSize = TD->getPointerSize();
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ return Offset;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG& DAG) const {
+ if (!PerformTailCallOpt)
+ return false;
+
+ if (CheckTailCallReturnConstraints(TheCall, Ret)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC= TheCall->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ SDValue Callee = TheCall->getCallee();
+ // On x86/32Bit PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
+ return true;
+
+ // Can only do local tail calls (in same module, hidden or protected) on
+ // x86_64 PIC/GOT at the moment.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmo,
+ DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &vm,
+ DenseMap<const BasicBlock *,
+ MachineBasicBlock *> &bm,
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ ) {
+ return X86::createFastISel(mf, mmo, dw, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ );
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+ SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+ if (!isFP) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ // X > -1 -> X == 0, jump !sign.
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_NS;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ // X < 0 -> X == 0, jump on sign.
+ return X86::COND_S;
+ } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+ // X < 1 -> X <= 0
+ RHS = DAG.getConstant(0, RHS.getValueType());
+ return X86::COND_LE;
+ }
+ }
+
+ switch (SetCCOpcode) {
+ default: assert(0 && "Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
+
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
+ !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: assert(0 && "Condcode should be pre-legalized away");
+ case ISD::SETUEQ:
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: return X86::COND_A;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: return X86::COND_AE;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: return X86::COND_B;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: return X86::COND_BE;
+ case ISD::SETONE:
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETUO: return X86::COND_P;
+ case ISD::SETO: return X86::COND_NP;
+ }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+ switch (X86CC) {
+ default:
+ return false;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_E:
+ case X86::COND_P:
+ case X86::COND_A:
+ case X86::COND_AE:
+ case X86::COND_NE:
+ case X86::COND_NP:
+ return true;
+ }
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+ return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+ if (Val < 0 || Val == CmpVal)
+ return true;
+ return false;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16)
+ return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return (Mask[0] < 2 && Mask[1] < 2);
+ return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Lower quadword copied in order or undef.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Upper quadword shuffled.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ if (VT != MVT::v8i16)
+ return false;
+
+ // Upper quadword copied in order.
+ for (int i = 4; i != 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] != i)
+ return false;
+
+ // Lower quadword shuffled.
+ for (int i = 0; i != 4; ++i)
+ if (Mask[i] >= 4)
+ return false;
+
+ return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+
+ return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ int Half = NumElems / 2;
+ for (int i = 0; i < Half; ++i)
+ if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+ return false;
+ for (int i = Half; i < NumElems; ++i)
+ if (!isUndefOrInRange(Mask[i], 0, NumElems))
+ return false;
+ return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+ return isUndefOrEqual(N->getMaskElt(0), 6) &&
+ isUndefOrEqual(N->getMaskElt(1), 7) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+ return false;
+
+ for (unsigned i = NumElems/2; i < NumElems; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ return true;
+}
+
+/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
+/// and MOVLHPS.
+bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+
+ for (unsigned i = 0; i < NumElems/2; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+ return false;
+
+ return true;
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j + NumElts/2))
+ return false;
+ if (V2IsSplat) {
+ if (isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElems = VT.getVectorNumElements();
+ if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+ return false;
+
+ for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
+ return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
+ int NumElts = VT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], NumElts))
+ return false;
+
+ for (int i = 1; i < NumElts; ++i)
+ if (!isUndefOrEqual(Mask[i], i))
+ return false;
+
+ return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT,
+ bool V2IsSplat = false, bool V2IsUndef = false) {
+ int NumOps = VT.getVectorNumElements();
+ if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+ return false;
+
+ if (!isUndefOrEqual(Mask[0], 0))
+ return false;
+
+ for (int i = 1; i < NumOps; ++i)
+ if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+ (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+ (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+ return false;
+
+ return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+ bool V2IsUndef = false) {
+ SmallVector<int, 8> M;
+ N->getMask(M);
+ return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 1, 1, 3, 3
+ for (unsigned i = 0; i < 2; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 1)
+ return false;
+ }
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 3)
+ return false;
+ if (Elt == 3)
+ HasHi = true;
+ }
+ // Don't use movshdup if it can be done with a shufps.
+ // FIXME: verify that matching u, u, 3, 3 is what we want.
+ return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
+ if (N->getValueType(0).getVectorNumElements() != 4)
+ return false;
+
+ // Expect 0, 0, 2, 2
+ for (unsigned i = 0; i < 2; ++i)
+ if (N->getMaskElt(i) > 0)
+ return false;
+
+ bool HasHi = false;
+ for (unsigned i = 2; i < 4; ++i) {
+ int Elt = N->getMaskElt(i);
+ if (Elt >= 0 && Elt != 2)
+ return false;
+ if (Elt == 2)
+ HasHi = true;
+ }
+ // Don't use movsldup if it can be done with a shufps.
+ return HasHi;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+ int e = N->getValueType(0).getVectorNumElements() / 2;
+
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(i), i))
+ return false;
+ for (int i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+ return false;
+ return true;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+/// instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+ unsigned Shift = (NumOperands == 4) ? 2 : 1;
+ unsigned Mask = 0;
+ for (int i = 0; i < NumOperands; ++i) {
+ int Val = SVOp->getMaskElt(NumOperands-i-1);
+ if (Val < 0) Val = 0;
+ if (Val >= NumOperands) Val -= NumOperands;
+ Mask |= Val;
+ if (i != NumOperands - 1)
+ Mask <<= Shift;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+/// instructions.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the last 4.
+ for (unsigned i = 7; i >= 4; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= (Val - 4);
+ if (i != 4)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+/// instructions.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ unsigned Mask = 0;
+ // 8 nodes, but we only care about the first 4.
+ for (int i = 3; i >= 0; --i) {
+ int Val = SVOp->getMaskElt(i);
+ if (Val >= 0)
+ Mask |= Val;
+ if (i != 0)
+ Mask <<= 2;
+ }
+ return Mask;
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> MaskVec;
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = SVOp->getMaskElt(i);
+ if (idx < 0)
+ MaskVec.push_back(idx);
+ else if (idx < (int)NumElems)
+ MaskVec.push_back(idx + NumElems);
+ else
+ MaskVec.push_back(idx - NumElems);
+ }
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+ SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, MVT VT) {
+ unsigned NumElems = VT.getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElems)
+ Mask[i] = idx + NumElems;
+ else
+ Mask[i] = idx - NumElems;
+ }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+ if (Op->getValueType(0).getVectorNumElements() != 4)
+ return false;
+ for (unsigned i = 0, e = 2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+ return false;
+ for (unsigned i = 2; i != 4; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+ return false;
+ return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+ ShuffleVectorSDNode *Op) {
+ if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+ return false;
+ // Is V2 is a vector load, don't do this transformation. We will try to use
+ // load folding shufps op.
+ if (ISD::isNON_EXTLoad(V2))
+ return false;
+
+ unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 2 && NumElems != 4)
+ return false;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i))
+ return false;
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
+ if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+ return false;
+ return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ SDValue SplatValue = N->getOperand(0);
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ if (N->getOperand(i) != SplatValue)
+ return false;
+ return true;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+static inline bool isZeroNode(SDValue Elt) {
+ return ((isa<ConstantSDNode>(Elt) &&
+ cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ (isa<ConstantFPSDNode>(Elt) &&
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+ SDValue V1 = N->getOperand(0);
+ SDValue V2 = N->getOperand(1);
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+ for (unsigned i = 0; i != NumElems; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx >= (int)NumElems) {
+ unsigned Opc = V2.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems)))
+ return false;
+ } else if (Idx >= 0) {
+ unsigned Opc = V1.getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx)))
+ return false;
+ }
+ }
+ return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG,
+ DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDValue Vec;
+ if (VT.getSizeInBits() == 64) { // MMX
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ } else if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ assert(VT.isVector() && "Expected a vector type");
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDValue Vec;
+ if (VT.getSizeInBits() == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+}
+
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ MVT VT = SVOp->getValueType(0);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> MaskVec;
+ SVOp->getMask(MaskVec);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (MaskVec[i] > (int)NumElems) {
+ MaskVec[i] = NumElems;
+ Changed = true;
+ }
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+ SVOp->getOperand(1), &MaskVec[0]);
+ return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+ Mask.push_back(i);
+ Mask.push_back(i + NumElems);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned Half = NumElems/2;
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != Half; ++i) {
+ Mask.push_back(i + Half);
+ Mask.push_back(i + NumElems + Half);
+ }
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
+ bool HasSSE2) {
+ if (SV->getValueType(0).getVectorNumElements() <= 4)
+ return SDValue(SV, 0);
+
+ MVT PVT = MVT::v4f32;
+ MVT VT = SV->getValueType(0);
+ DebugLoc dl = SV->getDebugLoc();
+ SDValue V1 = SV->getOperand(0);
+ int NumElems = VT.getVectorNumElements();
+ int EltNo = SV->getSplatIndex();
+
+ // unpack elements to the correct location
+ while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ V1 = getUnpackl(DAG, dl, VT, V1, V1);
+ } else {
+ V1 = getUnpackh(DAG, dl, VT, V1, V1);
+ EltNo -= NumElems/2;
+ }
+ NumElems >>= 1;
+ }
+
+ // Perform the splat.
+ int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1);
+ V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+ bool isZero, bool HasSSE2,
+ SelectionDAG &DAG) {
+ MVT VT = V2.getValueType();
+ SDValue V1 = isZero
+ ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(i == Idx ? NumElems : i);
+ return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements in a result of
+/// a shuffle that is zero.
+static
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
+ bool Low, SelectionDAG &DAG) {
+ unsigned NumZeros = 0;
+ for (int i = 0; i < NumElems; ++i) {
+ unsigned Index = Low ? i : NumElems-i-1;
+ int Idx = SVOp->getMaskElt(Index);
+ if (Idx < 0) {
+ ++NumZeros;
+ continue;
+ }
+ SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
+ if (Elt.getNode() && isZeroNode(Elt))
+ ++NumZeros;
+ else
+ break;
+ }
+ return NumZeros;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+/// FIXME: split into pslldqi, psrldqi, palignr variants.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ int NumElems = SVOp->getValueType(0).getVectorNumElements();
+
+ isLeft = true;
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
+ if (!NumZeros) {
+ isLeft = false;
+ NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
+ if (!NumZeros)
+ return false;
+ }
+ bool SeenV1 = false;
+ bool SeenV2 = false;
+ for (int i = NumZeros; i < NumElems; ++i) {
+ int Val = isLeft ? (i - NumZeros) : i;
+ int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+ if (Idx < 0)
+ continue;
+ if (Idx < NumElems)
+ SeenV1 = true;
+ else {
+ Idx -= NumElems;
+ SeenV2 = true;
+ }
+ if (Idx != Val)
+ return false;
+ }
+ if (SeenV1 && SeenV2)
+ return false;
+
+ ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ ShAmt = NumZeros;
+ return true;
+}
+
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+ if (ThisIsNonZero && First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+
+ if ((i & 1) != 0) {
+ SDValue ThisElt(0, 0), LastElt(0, 0);
+ bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ if (LastIsNonZero) {
+ LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+ MVT::i16, Op.getOperand(i-1));
+ }
+ if (ThisIsNonZero) {
+ ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+ ThisElt, DAG.getConstant(8, MVT::i8));
+ if (LastIsNonZero)
+ ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+ } else
+ ThisElt = LastElt;
+
+ if (ThisElt.getNode())
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i/2));
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG, TargetLowering &TLI) {
+ if (NumNonZero > 4)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue V(0, 0);
+ bool First = true;
+ for (unsigned i = 0; i < 8; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v8i16, true, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v8i16);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v8i16, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
+ unsigned NumBits, SelectionDAG &DAG,
+ const TargetLowering &TLI, DebugLoc dl) {
+ bool isMMX = VT.getSizeInBits() == 64;
+ MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
+ unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(Opc, dl, ShVT, SrcOp,
+ DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllZeros(Op.getNode())
+ || ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ return Op;
+
+ if (ISD::isBuildVectorAllOnes(Op.getNode()))
+ return getOnesVector(Op.getValueType(), DAG, dl);
+ return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+ }
+
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ unsigned NumElems = Op.getNumOperands();
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ bool IsAllConstants = true;
+ SmallSet<SDValue, 8> Values;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ IsAllConstants = false;
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
+ }
+ }
+
+ if (NumNonZero == 0) {
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ return DAG.getUNDEF(VT);
+ }
+
+ // Special case for single non-zero, non-undef, element.
+ if (NumNonZero == 1 && NumElems <= 4) {
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle MMX and SSE both.
+ MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SmallVector<int, 4> Mask;
+ Mask.push_back(Idx);
+ for (unsigned i = 1; i != VecElts; ++i)
+ Mask.push_back(i);
+ Item = DAG.getVectorShuffle(VecVT, dl, Item,
+ DAG.getUNDEF(Item.getValueType()),
+ &Mask[0]);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is. Because we can only get here
+ // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
+ if (Idx == 0 &&
+ // Don't do this for i64 values on x86-32.
+ (EVT != MVT::i64 || Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ }
+
+ // Is it a vector logical left shift?
+ if (NumElems == 2 && Idx == 1 &&
+ isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
+ unsigned NumBits = VT.getSizeInBits();
+ return getVShift(true, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ VT, Op.getOperand(1)),
+ NumBits/2, DAG, *this, dl);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+ return SDValue();
+
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
+ if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+ // Turn it into a shuffle of zero and zero-extended scalar to vector.
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+ Subtarget->hasSSE2(), DAG);
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i++)
+ MaskVec.push_back(i == Idx ? 0 : 1);
+ return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+ }
+ }
+
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDValue();
+
+ // A vector full of immediates; various special cases are already
+ // handled, so this is best done with a single constant-pool load.
+ if (IsAllConstants)
+ return SDValue();
+
+ // Let legalizer expand 2-wide build_vectors.
+ if (EVTBits == 64) {
+ if (NumNonZero == 1) {
+ // One half is zero or undef.
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+ Op.getOperand(Idx));
+ return getShuffleVectorZeroOrUndef(V2, Idx, true,
+ Subtarget->hasSSE2(), DAG);
+ }
+ return SDValue();
+ }
+
+ // If element VT is < 32 bits, convert it to inserts into a zero vector.
+ if (EVTBits == 8 && NumElems == 16) {
+ SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ if (EVTBits == 16 && NumElems == 8) {
+ SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ *this);
+ if (V.getNode()) return V;
+ }
+
+ // If element VT is == 32 bits, turn it into a number of shuffles.
+ SmallVector<SDValue, 8> V;
+ V.resize(NumElems);
+ if (NumElems == 4 && NumZero > 0) {
+ for (unsigned i = 0; i < 4; ++i) {
+ bool isZero = !(NonZeros & (1 << i));
+ if (isZero)
+ V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+ else
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ }
+
+ for (unsigned i = 0; i < 2; ++i) {
+ switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+ default: break;
+ case 0:
+ V[i] = V[i*2]; // Must be a zero vector.
+ break;
+ case 1:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+ break;
+ case 2:
+ V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ case 3:
+ V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+ break;
+ }
+ }
+
+ SmallVector<int, 8> MaskVec;
+ bool Reverse = (NonZeros & 0x3) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i : i);
+ Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+ for (unsigned i = 0; i < 2; ++i)
+ MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+ return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+ }
+
+ if (Values.size() > 2) {
+ // If we have SSE 4.1, Expand into a number of inserts unless the number of
+ // values to be inserted is equal to the number of elements, in which case
+ // use the unpack code below in the hopes of matching the consecutive elts
+ // load merge pattern for shuffles.
+ // FIXME: We could probably just check that here directly.
+ if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
+ getSubtarget()->hasSSE41()) {
+ V[0] = DAG.getUNDEF(VT);
+ for (unsigned i = 0; i < NumElems; ++i)
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ Op.getOperand(i), DAG.getIntPtrConstant(i));
+ return V[0];
+ }
+ // Expand into a number of unpckl*.
+ // e.g. for v4f32
+ // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+ // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+ // Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ NumElems >>= 1;
+ while (NumElems != 0) {
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
+ NumElems >>= 1;
+ }
+ return V[0];
+ }
+
+ return SDValue();
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all] pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+static
+SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 8> MaskVals;
+
+ // Determine if more than 1 of the words in each of the low and high quadwords
+ // of the result come from the same quadword of one of the two inputs. Undef
+ // mask values count as coming from any quadword, for better codegen.
+ SmallVector<unsigned, 4> LoQuad(4);
+ SmallVector<unsigned, 4> HiQuad(4);
+ BitVector InputQuads(4);
+ for (unsigned i = 0; i < 8; ++i) {
+ SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+ int EltIdx = SVOp->getMaskElt(i);
+ MaskVals.push_back(EltIdx);
+ if (EltIdx < 0) {
+ ++Quad[0];
+ ++Quad[1];
+ ++Quad[2];
+ ++Quad[3];
+ continue;
+ }
+ ++Quad[EltIdx / 4];
+ InputQuads.set(EltIdx / 4);
+ }
+
+ int BestLoQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LoQuad[i] > MaxQuad) {
+ BestLoQuad = i;
+ MaxQuad = LoQuad[i];
+ }
+ }
+
+ int BestHiQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HiQuad[i] > MaxQuad) {
+ BestHiQuad = i;
+ MaxQuad = HiQuad[i];
+ }
+ }
+
+ // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+ // of the two input vectors, shuffle them into one input vector so only a
+ // single pshufb instruction is necessary. If There are more than 2 input
+ // quads, disable the next transformation since it does not help SSSE3.
+ bool V1Used = InputQuads[0] || InputQuads[1];
+ bool V2Used = InputQuads[2] || InputQuads[3];
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ if (InputQuads.count() == 2 && V1Used && V2Used) {
+ BestLoQuad = InputQuads.find_first();
+ BestHiQuad = InputQuads.find_next(BestLoQuad);
+ }
+ if (InputQuads.count() > 2) {
+ BestLoQuad = -1;
+ BestHiQuad = -1;
+ }
+ }
+
+ // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+ // the shuffle mask. If a quad is scored as -1, that means that it contains
+ // words from all 4 input quadwords.
+ SDValue NewV;
+ if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+ MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+ NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV);
+
+ // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+ // source words for the shuffle, to aid later transformations.
+ bool AllWordsInNewV = true;
+ bool InOrder[2] = { true, true };
+ for (unsigned i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx != (int)i)
+ InOrder[i/4] = false;
+ if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+ continue;
+ AllWordsInNewV = false;
+ break;
+ }
+
+ bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+ if (AllWordsInNewV) {
+ for (int i = 0; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0)
+ continue;
+ idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+ if ((idx != i) && idx < 4)
+ pshufhw = false;
+ if ((idx != i) && idx > 3)
+ pshuflw = false;
+ }
+ V1 = NewV;
+ V2Used = false;
+ BestLoQuad = 0;
+ BestHiQuad = 1;
+ }
+
+ // If we've eliminated the use of V2, and the new mask is a pshuflw or
+ // pshufhw, that's as cheap as it gets. Return the new shuffle.
+ if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+ return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ }
+ }
+
+ // If we have SSSE3, and all words of the result are from 1 input vector,
+ // case 2 is generated, otherwise case 3 is generated. If no SSSE3
+ // is present, fall back to case 4.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If we have elements from both input vectors, set the high bit of the
+ // shuffle mask element to zero out elements that come from V2 in the V1
+ // mask, and elements that come from V1 in the V2 mask, so that the two
+ // results can be OR'd together.
+ bool TwoInputs = V1Used && V2Used;
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (TwoInputs && (EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+ }
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1);
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 8; ++i) {
+ int EltIdx = MaskVals[i] * 2;
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+ }
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2);
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ }
+
+ // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+ // and update MaskVals with new element order.
+ BitVector InOrder(8);
+ if (BestLoQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (int i = 0; i != 4; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestLoQuad) {
+ MaskV.push_back(idx & 3);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ MaskV.push_back(i);
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+ }
+
+ // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+ // and update MaskVals with the new element order.
+ if (BestHiQuad >= 0) {
+ SmallVector<int, 8> MaskV;
+ for (unsigned i = 0; i != 4; ++i)
+ MaskV.push_back(i);
+ for (unsigned i = 4; i != 8; ++i) {
+ int idx = MaskVals[i];
+ if (idx < 0) {
+ MaskV.push_back(-1);
+ InOrder.set(i);
+ } else if ((idx / 4) == BestHiQuad) {
+ MaskV.push_back((idx & 3) + 4);
+ InOrder.set(i);
+ } else {
+ MaskV.push_back(-1);
+ }
+ }
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+ &MaskV[0]);
+ }
+
+ // In case BestHi & BestLo were both -1, which means each quadword has a word
+ // from each of the four input quadwords, calculate the InOrder bitvector now
+ // before falling through to the insert/extract cleanup.
+ if (BestLoQuad == -1 && BestHiQuad == -1) {
+ NewV = V1;
+ for (int i = 0; i != 8; ++i)
+ if (MaskVals[i] < 0 || MaskVals[i] == i)
+ InOrder.set(i);
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ SDValue ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+ DAG.getIntPtrConstant(EltIdx))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+ DAG.getIntPtrConstant(EltIdx - 8));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+ DAG.getIntPtrConstant(i));
+ }
+ return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG, X86TargetLowering &TLI) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ SmallVector<int, 16> MaskVals;
+ SVOp->getMask(MaskVals);
+
+ // If we have SSSE3, case 1 is generated when all result bytes come from
+ // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is
+ // present, fall back to case 3.
+ // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+ bool V1Only = true;
+ bool V2Only = true;
+ for (unsigned i = 0; i < 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0)
+ continue;
+ if (EltIdx < 16)
+ V2Only = false;
+ else
+ V1Only = false;
+ }
+
+ // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+ if (TLI.getSubtarget()->hasSSSE3()) {
+ SmallVector<SDValue,16> pshufbMask;
+
+ // If all result elements are from one input vector, then only translate
+ // undef mask values to 0x80 (zero out result) in the pshufb mask.
+ //
+ // Otherwise, we have elements from both input vectors, and must zero out
+ // elements that come from V2 in the first mask, and V1 in the second mask
+ // so that we can OR them together.
+ bool TwoInputs = !(V1Only || V2Only);
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+ }
+ // If all the elements are from V2, assign it to V1 and return after
+ // building the first pshufb.
+ if (V2Only)
+ V1 = V2;
+ V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ if (!TwoInputs)
+ return V1;
+
+ // Calculate the shuffle mask for the second input, shuffle it, and
+ // OR it with the first shuffled input.
+ pshufbMask.clear();
+ for (unsigned i = 0; i != 16; ++i) {
+ int EltIdx = MaskVals[i];
+ if (EltIdx < 16) {
+ pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+ continue;
+ }
+ pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+ }
+ V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v16i8, &pshufbMask[0], 16));
+ return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+ }
+
+ // No SSSE3 - Calculate in place words and then fix all out of place words
+ // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from
+ // the 16 different words that comprise the two doublequadword input vectors.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2);
+ SDValue NewV = V2Only ? V2 : V1;
+ for (int i = 0; i != 8; ++i) {
+ int Elt0 = MaskVals[i*2];
+ int Elt1 = MaskVals[i*2+1];
+
+ // This word of the result is all undef, skip it.
+ if (Elt0 < 0 && Elt1 < 0)
+ continue;
+
+ // This word of the result is already in the correct place, skip it.
+ if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+ continue;
+ if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+ continue;
+
+ SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+ SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+ SDValue InsElt;
+
+ // If Elt0 and Elt1 are defined, are consecutive, and can be load
+ // using a single extract together, load it and store it.
+ if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ continue;
+ }
+
+ // If Elt1 is defined, extract it from the appropriate source. If the
+ // source byte is not also odd, shift the extracted word left 8 bits
+ // otherwise clear the bottom 8 bits if we need to do an or.
+ if (Elt1 >= 0) {
+ InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+ DAG.getIntPtrConstant(Elt1 / 2));
+ if ((Elt1 & 1) == 0)
+ InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+ DAG.getConstant(8, TLI.getShiftAmountTy()));
+ else if (Elt0 >= 0)
+ InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+ DAG.getConstant(0xFF00, MVT::i16));
+ }
+ // If Elt0 is defined, extract it from the appropriate source. If the
+ // source byte is not also even, shift the extracted word right 8 bits. If
+ // Elt1 was also defined, OR the extracted values together before
+ // inserting them in the result.
+ if (Elt0 >= 0) {
+ SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+ if ((Elt0 & 1) != 0)
+ InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+ DAG.getConstant(8, TLI.getShiftAmountTy()));
+ else if (Elt1 >= 0)
+ InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+ DAG.getConstant(0x00FF, MVT::i16));
+ InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+ : InsElt0;
+ }
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+ DAG.getIntPtrConstant(i));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+ SelectionDAG &DAG,
+ TargetLowering &TLI, DebugLoc dl) {
+ MVT VT = SVOp->getValueType(0);
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT MaskEltVT = MaskVT.getVectorElementType();
+ MVT NewVT = MaskVT;
+ switch (VT.getSimpleVT()) {
+ default: assert(false && "Unexpected!");
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ }
+
+ if (NewWidth == 2) {
+ if (VT.isInteger())
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ }
+ int Scale = NumElems / NewWidth;
+ SmallVector<int, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ int StartIdx = -1;
+ for (int j = 0; j < Scale; ++j) {
+ int EltIdx = SVOp->getMaskElt(i+j);
+ if (EltIdx < 0)
+ continue;
+ if (StartIdx == -1)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDValue();
+ }
+ if (StartIdx == -1)
+ MaskVec.push_back(-1);
+ else
+ MaskVec.push_back(StartIdx / Scale);
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2);
+ return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(MVT VT, MVT OpVT,
+ SDValue SrcOp, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, DebugLoc dl) {
+ if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ LoadSDNode *LD = NULL;
+ if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+ LD = dyn_cast<LoadSDNode>(SrcOp);
+ if (!LD) {
+ // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+ // instead.
+ MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+ if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
+ SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
+ SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
+ // PR2108
+ OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ OpVT,
+ SrcOp.getOperand(0)
+ .getOperand(0))));
+ }
+ }
+ }
+
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
+/// shuffles.
+static SDValue
+LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ DebugLoc dl = SVOp->getDebugLoc();
+ MVT VT = SVOp->getValueType(0);
+
+ SmallVector<std::pair<int, int>, 8> Locs;
+ Locs.resize(4);
+ SmallVector<int, 8> Mask1(4U, -1);
+ SmallVector<int, 8> PermMask;
+ SVOp->getMask(PermMask);
+
+ unsigned NumHi = 0;
+ unsigned NumLo = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else {
+ assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+ if (Idx < 4) {
+ Locs[i] = std::make_pair(0, NumLo);
+ Mask1[NumLo] = Idx;
+ NumLo++;
+ } else {
+ Locs[i] = std::make_pair(1, NumHi);
+ if (2+NumHi < 4)
+ Mask1[2+NumHi] = Idx;
+ NumHi++;
+ }
+ }
+ }
+
+ if (NumLo <= 2 && NumHi <= 2) {
+ // If no more than two elements come from either vector. This can be
+ // implemented with two shuffles. First shuffle gather the elements.
+ // The second shuffle, which takes the first shuffle as both of its
+ // vector operands, put the elements into the right order.
+ V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ SmallVector<int, 8> Mask2(4U, -1);
+
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1)
+ continue;
+ else {
+ unsigned Idx = (i < 2) ? 0 : 4;
+ Idx += Locs[i].first * 2 + Locs[i].second;
+ Mask2[i] = Idx;
+ }
+ }
+
+ return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+ } else if (NumLo == 3 || NumHi == 3) {
+ // Otherwise, we must have three elements from one vector, call it X, and
+ // one element from the other, call it Y. First, use a shufps to build an
+ // intermediate vector with the one element from Y and the element from X
+ // that will be in the same half in the final destination (the indexes don't
+ // matter). Then, use a shufps to build the final vector, taking the half
+ // containing the element from Y from the intermediate, and the other half
+ // from X.
+ if (NumHi == 3) {
+ // Normalize it so the 3 elements come from V1.
+ CommuteVectorShuffleMask(PermMask, VT);
+ std::swap(V1, V2);
+ }
+
+ // Find the element from V2.
+ unsigned HiIndex;
+ for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+ int Val = PermMask[HiIndex];
+ if (Val < 0)
+ continue;
+ if (Val >= 4)
+ break;
+ }
+
+ Mask1[0] = PermMask[HiIndex];
+ Mask1[1] = -1;
+ Mask1[2] = PermMask[HiIndex^1];
+ Mask1[3] = -1;
+ V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+ if (HiIndex >= 2) {
+ Mask1[0] = PermMask[0];
+ Mask1[1] = PermMask[1];
+ Mask1[2] = HiIndex & 1 ? 6 : 4;
+ Mask1[3] = HiIndex & 1 ? 4 : 6;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+ } else {
+ Mask1[0] = HiIndex & 1 ? 2 : 0;
+ Mask1[1] = HiIndex & 1 ? 0 : 2;
+ Mask1[2] = PermMask[2];
+ Mask1[3] = PermMask[3];
+ if (Mask1[2] >= 0)
+ Mask1[2] += 4;
+ if (Mask1[3] >= 0)
+ Mask1[3] += 4;
+ return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+ }
+ }
+
+ // Break it into (shuffle shuffle_hi, shuffle_lo).
+ Locs.clear();
+ SmallVector<int,8> LoMask(4U, -1);
+ SmallVector<int,8> HiMask(4U, -1);
+
+ SmallVector<int,8> *MaskPtr = &LoMask;
+ unsigned MaskIdx = 0;
+ unsigned LoIdx = 0;
+ unsigned HiIdx = 2;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (i == 2) {
+ MaskPtr = &HiMask;
+ MaskIdx = 1;
+ LoIdx = 0;
+ HiIdx = 2;
+ }
+ int Idx = PermMask[i];
+ if (Idx < 0) {
+ Locs[i] = std::make_pair(-1, -1);
+ } else if (Idx < 4) {
+ Locs[i] = std::make_pair(MaskIdx, LoIdx);
+ (*MaskPtr)[LoIdx] = Idx;
+ LoIdx++;
+ } else {
+ Locs[i] = std::make_pair(MaskIdx, HiIdx);
+ (*MaskPtr)[HiIdx] = Idx;
+ HiIdx++;
+ }
+ }
+
+ SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+ SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+ SmallVector<int, 8> MaskOps;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Locs[i].first == -1) {
+ MaskOps.push_back(-1);
+ } else {
+ unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+ MaskOps.push_back(Idx);
+ }
+ }
+ return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned NumElems = VT.getVectorNumElements();
+ bool isMMX = VT.getSizeInBits() == 64;
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ bool V1IsSplat = false;
+ bool V2IsSplat = false;
+
+ if (isZeroShuffle(SVOp))
+ return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+
+ // Promote splats to v4f32.
+ if (SVOp->isSplat()) {
+ if (isMMX || NumElems < 4)
+ return Op;
+ return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ }
+
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode())
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode()) {
+ if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+ DAG, Subtarget, dl);
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+ SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl);
+ if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+ return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+ DAG, Subtarget, dl);
+ }
+ }
+
+ if (X86::isPSHUFDMask(SVOp))
+ return Op;
+
+ // Check if this can be converted into a logical shift.
+ bool isLeft = false;
+ unsigned ShAmt = 0;
+ SDValue ShVal;
+ bool isShift = getSubtarget()->hasSSE2() &&
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ if (isShift && ShVal.hasOneUse()) {
+ // If the shifted value has multiple uses, it may be cheaper to use
+ // v_set0 + movlhps or movhlps, etc.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ if (X86::isMOVLMask(SVOp)) {
+ if (V1IsUndef)
+ return V2;
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+ if (!isMMX)
+ return Op;
+ }
+
+ // FIXME: fold these into legal mask.
+ if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
+ X86::isMOVSLDUPMask(SVOp) ||
+ X86::isMOVHLPSMask(SVOp) ||
+ X86::isMOVHPMask(SVOp) ||
+ X86::isMOVLPMask(SVOp)))
+ return Op;
+
+ if (ShouldXformToMOVHLPS(SVOp) ||
+ ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ if (isShift) {
+ // No better options. Use a vshl / vsrl.
+ MVT EVT = VT.getVectorElementType();
+ ShAmt *= EVT.getSizeInBits();
+ return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+ }
+
+ bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
+ V1IsSplat = isSplatVector(V1.getNode());
+ V2IsSplat = isSplatVector(V2.getNode());
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
+ if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+ Op = CommuteVectorShuffle(SVOp, DAG);
+ SVOp = cast<ShuffleVectorSDNode>(Op);
+ V1 = SVOp->getOperand(0);
+ V2 = SVOp->getOperand(1);
+ std::swap(V1IsSplat, V2IsSplat);
+ std::swap(V1IsUndef, V2IsUndef);
+ Commuted = true;
+ }
+
+ if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+ // Shuffling low element of v1 into undef, just return v1.
+ if (V2IsUndef)
+ return V1;
+ // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+ // the instruction selector will not match, so get a canonical MOVL with
+ // swapped operands to undo the commute.
+ return getMOVL(DAG, dl, VT, V2, V1);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
+ X86::isUNPCKH_v_undef_Mask(SVOp) ||
+ X86::isUNPCKLMask(SVOp) ||
+ X86::isUNPCKHMask(SVOp))
+ return Op;
+
+ if (V2IsSplat) {
+ // Normalize mask so all entries that point to V2 points to its first
+ // element then try to match unpck{h|l} again. If match, return a
+ // new vector_shuffle with the corrected mask.
+ SDValue NewMask = NormalizeMask(SVOp, DAG);
+ ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+ if (NSVOp != SVOp) {
+ if (X86::isUNPCKLMask(NSVOp, true)) {
+ return NewMask;
+ } else if (X86::isUNPCKHMask(NSVOp, true)) {
+ return NewMask;
+ }
+ }
+ }
+
+ if (Commuted) {
+ // Commute is back and try unpck* again.
+ // FIXME: this seems wrong.
+ SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+ ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+ if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
+ X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
+ X86::isUNPCKLMask(NewSVOp) ||
+ X86::isUNPCKHMask(NewSVOp))
+ return NewOp;
+ }
+
+ // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
+
+ // Normalize the node to match x86 shuffle ops if needed
+ if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // Check for legal shuffle and return?
+ SmallVector<int, 16> PermMask;
+ SVOp->getMask(PermMask);
+ if (isShuffleMaskLegal(PermMask, VT))
+ return Op;
+
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ if (VT == MVT::v16i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles except for MMX.
+ if (NumElems == 4 && !isMMX)
+ return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ if (VT.getSizeInBits() == 8) {
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 16) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1)));
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
+ if (!Op.hasOneUse())
+ return SDValue();
+ SDNode *User = *Op.getNode()->use_begin();
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ (User->getOpcode() != ISD::BIT_CONVERT ||
+ User->getValueType(0) != MVT::i32))
+ return SDValue();
+ SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
+ Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
+ } else if (VT == MVT::i32) {
+ // ExtractPS works with constant index.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return Op;
+ }
+ return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ if (Subtarget->hasSSE41()) {
+ SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ // TODO: handle v16i8.
+ if (VT.getSizeInBits() == 16) {
+ SDValue Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl,
+ MVT::v4i32, Vec),
+ Op.getOperand(1)));
+ // Transform it so it match pextrw which produces a 32-bit result.
+ MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
+ SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT,
+ Op.getOperand(0), Op.getOperand(1));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+ } else if (VT.getSizeInBits() == 32) {
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // SHUFPS the element to the lowest double word, then movss.
+ int Mask[4] = { Idx, -1, -1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ } else if (VT.getSizeInBits() == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ if (Idx == 0)
+ return Op;
+
+ // UNPCKHPD the element to the lowest double word, then movsd.
+ // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+ // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+ int Mask[2] = { 1, -1 };
+ MVT VVT = Op.getOperand(0).getValueType();
+ SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+ DAG.getUNDEF(VVT), Mask);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+ DAG.getIntPtrConstant(0));
+ }
+
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
+ isa<ConstantSDNode>(N2)) {
+ unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
+ : X86ISD::PINSRW;
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ } else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+ } else if (EVT == MVT::i32) {
+ // InsertPS works with constant index.
+ if (isa<ConstantSDNode>(N2))
+ return Op;
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT EVT = VT.getVectorElementType();
+
+ if (Subtarget->hasSSE41())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+ if (EVT == MVT::i8)
+ return SDValue();
+
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
+ SDValue N2 = Op.getOperand(2);
+
+ if (EVT.getSizeInBits() == 16) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+ return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ }
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ if (Op.getValueType() == MVT::v2f32)
+ return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(0))));
+
+ SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+ MVT VT = MVT::v2i32;
+ switch (Op.getValueType().getSimpleVT()) {
+ default: break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ VT = MVT::v4i32;
+ break;
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really any debug info here, should come from the parent
+ DebugLoc dl = CP->getDebugLoc();
+ SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+ CP->getAlignment());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset,
+ SelectionDAG &DAG) const {
+ bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ bool ExtraLoadRequired =
+ Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ SDValue Result;
+ if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Offset = 0;
+ } else
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+ // With PIC, the address is actually $g + Offset.
+ if (IsPic && !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+ Result);
+ }
+
+ // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
+ // load the value at address GV, not the value of GV itself. This means that
+ // the GlobalAddress must be in the base or index register of the address, not
+ // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
+ // The same applies for external symbols during PIC codegen
+ if (ExtraLoadRequired)
+ Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ PseudoSourceValue::getGOT(), 0);
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
+ return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+ SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) {
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ DebugLoc dl = GA->getDebugLoc();
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ if (InFlag) {
+ SDValue Ops[] = { Chain, TGA, *InFlag };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+ } else {
+ SDValue Ops[] = { Chain, TGA };
+ Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+ }
+ SDValue Flag = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
+ SDValue InFlag;
+ DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
+ SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ PtrVT), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT) {
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ const MVT PtrVT, TLSModel::Model model,
+ bool is64Bit) {
+ DebugLoc dl = GA->getDebugLoc();
+ // Get the Thread Pointer
+ SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
+ DebugLoc::getUnknownLoc(), PtrVT,
+ DAG.getRegister(is64Bit? X86::FS : X86::GS,
+ MVT::i32));
+
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
+ NULL, 0);
+
+ // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+ // exec)
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getValueType(0),
+ GA->getOffset());
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
+
+ if (model == TLSModel::InitialExec)
+ Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+ PseudoSourceValue::getGOT(), 0);
+
+ // The address of the thread local variable is the add of the thread
+ // pointer with the offset of the variable.
+ return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+ assert(Subtarget->isTargetELF() &&
+ "TLS not implemented for non-ELF targets");
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ GlobalValue *GV = GA->getGlobal();
+ TLSModel::Model model =
+ getTLSModel (GV, getTargetMachine().getRelocationModel());
+ if (Subtarget->is64Bit()) {
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true);
+ }
+ } else {
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false);
+ }
+ }
+ assert(0 && "Unreachable");
+ return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+ // FIXME there isn't really any debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // FIXME there isn't really any debug into here
+ DebugLoc dl = JT->getDebugLoc();
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+ Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ // With PIC, the address is actually $g + Offset.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ !Subtarget->isPICStyleRIPRel()) {
+ Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc::getUnknownLoc(),
+ getPointerTy()),
+ Result);
+ }
+
+ return Result;
+}
+
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ MVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i8)) :
+ DAG.getConstant(0, VT);
+
+ SDValue Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ }
+
+ SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDValue Hi, Lo;
+ SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+ SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ } else {
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ }
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ MVT SrcVT = Op.getOperand(0).getValueType();
+ assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; return the operand so the caller accepts it as
+ // Legal.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return Op;
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ Subtarget->is64Bit()) {
+ return Op;
+ }
+
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Size = SrcVT.getSizeInBits()/8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain,
+ SDValue StackSlot,
+ SelectionDAG &DAG) {
+ // Build the FILD
+ DebugLoc dl = Op.getDebugLoc();
+ SDVTList Tys;
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+ if (useSSE)
+ Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
+ else
+ Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(SrcVT));
+ SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
+ Tys, &Ops[0], Ops.size());
+
+ if (useSSE) {
+ Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+ // shouldn't be necessary except that RFP cannot be live across
+ // multiple blocks. When stackifier is fixed, they can be uncoupled.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Result);
+ Ops.push_back(StackSlot);
+ Ops.push_back(DAG.getValueType(Op.getValueType()));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::FST, dl, Tys, &Ops[0], Ops.size());
+ Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ }
+
+ return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
+ // This algorithm is not obvious. Here it is in C code, more or less:
+ /*
+ double uint64_to_double( uint32_t hi, uint32_t lo ) {
+ static const __m128i exp = { 0x4330000045300000ULL, 0 };
+ static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+ // Copy ints to xmm registers.
+ __m128i xh = _mm_cvtsi32_si128( hi );
+ __m128i xl = _mm_cvtsi32_si128( lo );
+
+ // Combine into low half of a single xmm register.
+ __m128i x = _mm_unpacklo_epi32( xh, xl );
+ __m128d d;
+ double sd;
+
+ // Merge in appropriate exponents to give the integer bits the right
+ // magnitude.
+ x = _mm_unpacklo_epi32( x, exp );
+
+ // Subtract away the biases to deal with the IEEE-754 double precision
+ // implicit 1.
+ d = _mm_sub_pd( (__m128d) x, bias );
+
+ // All conversions up to here are exact. The correctly rounded result is
+ // calculated using the current rounding mode using the following
+ // horizontal add.
+ d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+ _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this
+ // store doesn't really need to be here (except
+ // maybe to zero the other double)
+ return sd;
+ }
+ */
+
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Build some magic constants.
+ std::vector<Constant*> CV0;
+ CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ Constant *C0 = ConstantVector::get(CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+ std::vector<Constant*> CV1;
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(1)));
+ SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+ SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+ SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+ // Add the halves; easiest way is to swap them into another reg first.
+ int ShufMask[2] = { 1, -1 };
+ SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+ DAG.getUNDEF(MVT::v2f64), ShufMask);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+ DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // FP constant to bias correct the final result.
+ SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+ MVT::f64);
+
+ // Load the 32-bit value into an XMM register.
+ SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+
+ Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load),
+ DAG.getIntPtrConstant(0));
+
+ // Or the load with the bias.
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Load)),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, Bias)));
+ Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or),
+ DAG.getIntPtrConstant(0));
+
+ // Subtract the bias.
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+ // Handle final rounding.
+ MVT DestVT = Op.getValueType();
+
+ if (DestVT.bitsLT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+
+ // Handle final rounding.
+ return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ SDValue N0 = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+ // the optimization here.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+ MVT SrcVT = N0.getValueType();
+ if (SrcVT == MVT::i64) {
+ // We only handle SSE2 f64 target here; caller can expand the rest.
+ if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
+ return SDValue();
+
+ return LowerUINT_TO_FP_i64(Op, DAG);
+ } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+ return LowerUINT_TO_FP_i32(Op, DAG);
+ }
+
+ assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
+
+ // Make a 64-bit buffer, and use it to build an FILD.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, NULL, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, NULL, 0);
+ return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ MVT DstTy = Op.getValueType();
+
+ if (!IsSigned) {
+ assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+ DstTy = MVT::i64;
+ }
+
+ assert(DstTy.getSimpleVT() <= MVT::i64 &&
+ DstTy.getSimpleVT() >= MVT::i16 &&
+ "Unknown FP_TO_SINT to lower!");
+
+ // These are really Legal.
+ if (DstTy == MVT::i32 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+ if (Subtarget->is64Bit() &&
+ DstTy == MVT::i64 &&
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDValue(), SDValue());
+
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = DstTy.getSizeInBits()/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ unsigned Opc;
+ switch (DstTy.getSimpleVT()) {
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue Value = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
+ assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+ Chain = DAG.getStore(Chain, dl, Value, StackSlot,
+ PseudoSourceValue::getFixedStack(SSFI), 0);
+ SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
+ };
+ Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
+ Chain = Value.getValue(1);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
+ // Build the FP_TO_INT*_IN_MEM
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3);
+
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (FIST.getNode() == 0) return Op;
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
+ std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ assert(FIST.getNode() && "Unexpected failure");
+
+ // Load the result.
+ return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+ FIST, StackSlot, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
+ if (VT.isVector())
+ EltVT = VT.getVectorElementType();
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT EltVT = VT;
+ unsigned EltNum = 1;
+ if (VT.isVector()) {
+ EltVT = VT.getVectorElementType();
+ EltNum = VT.getVectorNumElements();
+ }
+ std::vector<Constant*> CV;
+ if (EltVT == MVT::f64) {
+ Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
+ CV.push_back(C);
+ CV.push_back(C);
+ } else {
+ Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ CV.push_back(C);
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ if (VT.isVector()) {
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64,
+ Op.getOperand(0)),
+ DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask)));
+ } else {
+ return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+ }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ MVT VT = Op.getValueType();
+ MVT SrcVT = Op1.getValueType();
+
+ // If second operand is smaller, extend it first.
+ if (SrcVT.bitsLT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+ SrcVT = VT;
+ }
+ // And if it is bigger, shrink it first.
+ if (SrcVT.bitsGT(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
+
+ // First get the sign bit of second operand.
+ std::vector<Constant*> CV;
+ if (SrcVT == MVT::f64) {
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ }
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+ // Shift sign bit right or left if the two operands have different types.
+ if (SrcVT.bitsGT(VT)) {
+ // Op0 is MVT::f32, Op1 is MVT::f64.
+ SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+ SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+ DAG.getConstant(32, MVT::i32));
+ SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit);
+ SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Clear first operand sign bit.
+ CV.clear();
+ if (VT == MVT::f64) {
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
+ } else {
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
+ }
+ C = ConstantVector::get(CV);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, 16);
+ SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+ SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CF and OF aren't always set the way we want. Determine which
+ // of these we need.
+ bool NeedCF = false;
+ bool NeedOF = false;
+ switch (X86CC) {
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ case X86::COND_O: case X86::COND_NO:
+ NeedOF = true;
+ break;
+ default: break;
+ }
+
+ // See if we can use the EFLAGS value from the operand instead of
+ // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+ // we prove that the arithmetic won't overflow, we can't use OF or CF.
+ if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to
+ // be selected as part of a load-modify-store instruction. When the root
+ // node in a match is a store, isel doesn't know how to remap non-chain
+ // non-flag uses of other nodes in the match, such as the ADD in this
+ // case. This leads to the ADD being left around and reselected, with
+ // the result being two adds in the output.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
+ }
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::SUB:
+ // Due to the ISEL shortcoming noted above, be conservative if this sub is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+ // Otherwise use a regular EFLAGS-setting sub.
+ Opcode = X86ISD::SUB;
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
+ }
+ if (Opcode != 0) {
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
+ }
+ }
+
+ // Otherwise just emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+ if (C->getAPIntValue() == 0)
+ return EmitTest(Op0, X86CC, DAG);
+
+ DebugLoc dl = Op0.getDebugLoc();
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND &&
+ Op0.hasOneUse() &&
+ Op1.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue LHS, RHS;
+ if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op010C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
+ if (Op010C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(0);
+ RHS = Op0.getOperand(1).getOperand(1);
+ }
+ } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *Op000C =
+ dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
+ if (Op000C->getZExtValue() == 1) {
+ LHS = Op0.getOperand(1);
+ RHS = Op0.getOperand(0).getOperand(1);
+ }
+ } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
+ SDValue AndLHS = Op0.getOperand(0);
+ if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+ LHS = AndLHS.getOperand(0);
+ RHS = AndLHS.getOperand(1);
+ }
+ }
+
+ if (LHS.getNode()) {
+ // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
+ // instruction. Since the shift amount is in-range-or-undefined, we know
+ // that doing a bittest on the i16 value is ok. We extend to i32 because
+ // the encoding for the i16 version is larger than the i32 version.
+ if (LHS.getValueType() == MVT::i8)
+ LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+ // If the operand types disagree, extend the shift amount to match. Since
+ // BT ignores high bits (like shifts) we can use anyextend.
+ if (LHS.getValueType() != RHS.getValueType())
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+ SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+ unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(Cond, MVT::i8), BT);
+ }
+ }
+
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+
+ SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+}
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue Cond;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getValueType();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (isFP) {
+ unsigned SSECC = 8;
+ MVT VT0 = Op0.getValueType();
+ assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
+ unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+ bool Swap = false;
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOEQ:
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETOGT:
+ case ISD::SETGT: Swap = true; // Fallthrough
+ case ISD::SETLT:
+ case ISD::SETOLT: SSECC = 1; break;
+ case ISD::SETOGE:
+ case ISD::SETGE: Swap = true; // Fallthrough
+ case ISD::SETLE:
+ case ISD::SETOLE: SSECC = 2; break;
+ case ISD::SETUO: SSECC = 3; break;
+ case ISD::SETUNE:
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETULE: Swap = true;
+ case ISD::SETUGE: SSECC = 5; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: SSECC = 6; break;
+ case ISD::SETO: SSECC = 7; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // In the two special cases we can't handle, emit two comparisons.
+ if (SSECC == 8) {
+ if (SetCCOpcode == ISD::SETUEQ) {
+ SDValue UNORD, EQ;
+ UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+ }
+ else if (SetCCOpcode == ISD::SETONE) {
+ SDValue ORD, NEQ;
+ ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ }
+ assert(0 && "Illegal FP comparison");
+ }
+ // Handle all other FP comparisons here.
+ return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ }
+
+ // We are handling one of the integer comparisons here. Since SSE only has
+ // GT and EQ comparisons for integer, swapping operands and multiple
+ // operations may be required for some comparisons.
+ unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ bool Swap = false, Invert = false, FlipSigns = false;
+
+ switch (VT.getSimpleVT()) {
+ default: break;
+ case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+ case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+ case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+ case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+ }
+
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETNE: Invert = true;
+ case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETLT: Swap = true;
+ case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGE: Swap = true;
+ case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETULT: Swap = true;
+ case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGE: Swap = true;
+ case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ }
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations.
+ if (FlipSigns) {
+ MVT EltVT = VT.getVectorElementType();
+ SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+ EltVT);
+ std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+ SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+ SignBits.size());
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ }
+
+ SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+ // If the logical-not of the result is required, perform that now.
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, VT);
+
+ return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+ unsigned Opc = Op.getNode()->getOpcode();
+ if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+ return true;
+ if (Op.getResNo() == 1 &&
+ (Opc == X86ISD::ADD ||
+ Opc == X86ISD::SUB ||
+ Opc == X86ISD::SMUL ||
+ Opc == X86ISD::UMUL ||
+ Opc == X86ISD::INC ||
+ Opc == X86ISD::DEC))
+ return true;
+
+ return false;
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDValue Cond = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG);
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ MVT VT = Op.getValueType();
+
+ bool IllegalFPCMov = false;
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+ if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+ Opc == X86ISD::BT) { // FIXME
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+ // condition is true.
+ Ops.push_back(Op.getOperand(2));
+ Ops.push_back(Op.getOperand(1));
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ return DAG.getNode(X86ISD::CMOV, dl, VTs, &Ops[0], Ops.size());
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+ if (Op.getOpcode() != ISD::XOR)
+ return false;
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (N1C && N1C->getAPIntValue() == 1) {
+ return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse();
+ }
+ return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
+ bool addTest = true;
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Dest = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue CC;
+
+ if (Cond.getOpcode() == ISD::SETCC)
+ Cond = LowerSETCC(Cond, DAG);
+#if 0
+ // FIXME: LowerXALUO doesn't handle these!!
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
+#endif
+
+ // If condition flag is set by a X86ISD::CMP, then use it as the condition
+ // setting operand in place of the X86ISD::SETCC.
+ if (Cond.getOpcode() == X86ISD::SETCC) {
+ CC = Cond.getOperand(0);
+
+ SDValue Cmp = Cond.getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+ if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+ Cond = Cmp;
+ addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_B:
+ // These can only come from an arithmetic instruction with overflow,
+ // e.g. SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Cmp) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDValue User = SDValue(*Op.getNode()->use_begin(), 0);
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User.getOpcode() == ISD::BR) {
+ SDValue FalseBB = User.getOperand(1);
+ SDValue NewBR =
+ DAG.UpdateNodeOperands(User, User.getOperand(0), Dest);
+ assert(NewBR == User);
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
+ } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+ // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+ // It should be transformed during dag combiner except when the condition
+ // is set by a arithmetics with overflow node.
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cond.getOperand(0).getOperand(1);
+ addTest = false;
+ }
+ }
+
+ if (addTest) {
+ CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ }
+ return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+ Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Subtarget->isTargetCygMing() &&
+ "This should be used only on Cygwin/Mingw targets");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ // FIXME: Ensure alignment here
+
+ SDValue Flag;
+
+ MVT IntPtr = getPointerTy();
+ MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain,
+ DAG.getTargetExternalSymbol("_alloca", IntPtr),
+ DAG.getRegister(X86::EAX, IntPtr),
+ DAG.getRegister(X86StackPtr, SPTy),
+ Flag };
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5);
+ Flag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true),
+ Flag);
+
+ Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+ SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ return DAG.getMergeValues(Ops1, 2, dl);
+}
+
+SDValue
+X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV,
+ uint64_t DstSVOff) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ getSubtarget()->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ MVT IntPtr = getPointerTy();
+ const Type *IntPtrTy = TD->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ MVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size());
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ MVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(MVT::i8));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, &Ops[0], Ops.size());
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT AddrVT = Dst.getValueType();
+ MVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, DstSV, DstSVOff + Offset);
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, call the library.
+ if ((Align & 3) != 0)
+ return SDValue();
+
+ // DWORD aligned
+ MVT AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getValueType(AVT));
+ Ops.push_back(InFlag);
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, &Ops[0], Ops.size());
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT DstVT = Dst.getValueType();
+ MVT SrcVT = Src.getValueType();
+ MVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, AlwaysInline,
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->is64Bit()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+ }
+
+ // __va_list_tag:
+ // gp_offset (0 - 6 * 8)
+ // fp_offset (48 - 48 + 8 * 16)
+ // overflow_arg_area (point to parameters coming in memory).
+ // reg_save_area
+ SmallVector<SDValue, 8> MemOps;
+ SDValue FIN = Op.getOperand(1);
+ // Store gp_offset
+ SDValue Store = DAG.getStore(Op.getOperand(0), dl,
+ DAG.getConstant(VarArgsGPOffset, MVT::i32),
+ FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store fp_offset
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ Store = DAG.getStore(Op.getOperand(0), dl,
+ DAG.getConstant(VarArgsFPOffset, MVT::i32),
+ FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to overflow_arg_area
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(4));
+ SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0);
+ MemOps.push_back(Store);
+
+ // Store ptr to reg_save_area.
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FIN, DAG.getIntPtrConstant(8));
+ SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0);
+ MemOps.push_back(Store);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ SDValue SrcSV = Op.getOperand(2);
+
+ assert(0 && "VAArgInst is not yet implemented for x86-64!");
+ abort();
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) {
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
+ DAG.getIntPtrConstant(24), 8, false,
+ DstSV, 0, SrcSV, 0);
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ // Comparison intrinsics.
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd: {
+ unsigned Opc = 0;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse2_comilt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse2_comile_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse2_comigt_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse2_comige_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse2_comineq_sd:
+ Opc = X86ISD::COMI;
+ CC = ISD::SETNE;
+ break;
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETEQ;
+ break;
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLT;
+ break;
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETLE;
+ break;
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGT;
+ break;
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETGE;
+ break;
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ Opc = X86ISD::UCOMI;
+ CC = ISD::SETNE;
+ break;
+ }
+
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+ SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
+
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = Op.getOperand(2);
+ if (isa<ConstantSDNode>(ShAmt))
+ return SDValue();
+
+ unsigned NewIntNo = 0;
+ MVT ShAmtVT = MVT::v4i32;
+ switch (IntNo) {
+ case Intrinsic::x86_sse2_pslli_w:
+ NewIntNo = Intrinsic::x86_sse2_psll_w;
+ break;
+ case Intrinsic::x86_sse2_pslli_d:
+ NewIntNo = Intrinsic::x86_sse2_psll_d;
+ break;
+ case Intrinsic::x86_sse2_pslli_q:
+ NewIntNo = Intrinsic::x86_sse2_psll_q;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ break;
+ case Intrinsic::x86_sse2_psrli_d:
+ NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ break;
+ case Intrinsic::x86_sse2_psrli_q:
+ NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ NewIntNo = Intrinsic::x86_sse2_psra_w;
+ break;
+ case Intrinsic::x86_sse2_psrai_d:
+ NewIntNo = Intrinsic::x86_sse2_psra_d;
+ break;
+ default: {
+ ShAmtVT = MVT::v2i32;
+ switch (IntNo) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: abort(); // Can't reach here.
+ }
+ break;
+ }
+ }
+ MVT VT = Op.getValueType();
+ ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(NewIntNo, MVT::i32),
+ Op.getOperand(1), ShAmt);
+ }
+ }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(),
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ NULL, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, NULL, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+ SelectionDAG &DAG) {
+ return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
+ unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+ SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(-TD->getPointerSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+ MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+ return DAG.getNode(X86ISD::EH_RETURN, dl,
+ MVT::Other,
+ Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) {
+ SDValue Root = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ DebugLoc dl = Op.getDebugLoc();
+
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+
+ if (Subtarget->is64Bit()) {
+ SDValue OutChains[6];
+
+ // Large code-model.
+
+ const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
+ const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+
+ const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+ const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDValue Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 10);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+ Addr, TrmpAddr, 20);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+ TrmpAddr, 22);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ } else {
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ unsigned CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::X86_StdCall: {
+ // Pass 'nest' parameter in ECX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::ECX;
+
+ // Check that ECX wasn't needed by an 'inreg' parameter.
+ const FunctionType *FTy = Func->getFunctionType();
+ const AttrListPtr &Attrs = Func->getAttributes();
+
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
+ unsigned InRegCount = 0;
+ unsigned Idx = 1;
+
+ for (FunctionType::param_iterator I = FTy->param_begin(),
+ E = FTy->param_end(); I != E; ++I, ++Idx)
+ if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+ // FIXME: should only count parameters that are lowered to integers.
+ InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+ if (InRegCount > 2) {
+ cerr << "Nest register in use - reduce number of inreg parameters!\n";
+ abort();
+ }
+ }
+ break;
+ }
+ case CallingConv::X86_FastCall:
+ case CallingConv::Fast:
+ // Pass 'nest' parameter in EAX.
+ // Must be kept in sync with X86CallingConv.td
+ NestReg = X86::EAX;
+ break;
+ }
+
+ SDValue OutChains[4];
+ SDValue Addr, Disp;
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(10, MVT::i32));
+ Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+ const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+ const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+ OutChains[0] = DAG.getStore(Root, dl,
+ DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+ Trmp, TrmpAddr, 0);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(1, MVT::i32));
+ OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1);
+
+ const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(5, MVT::i32));
+ OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+ TrmpAddr, 5, false, 1);
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+ DAG.getConstant(6, MVT::i32));
+ OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1);
+
+ SDValue Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
+ return DAG.getMergeValues(Ops, 2, dl);
+ }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ MVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
+ DAG.getEntryNode(), StackSlot);
+
+ // Load FP Control Word from stack slot
+ SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4);
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ MVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, &Ops[0], 4);
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+ DebugLoc dl = Op.getDebugLoc();
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+ return Res;
+}
+
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown ovf instruction!");
+ case ISD::SADDO:
+ // A subtract of one will be selected as a INC. Note that INC doesn't
+ // set CF, so we can't do this for UADDO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getAPIntValue() == 1) {
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SSUBO:
+ // A subtract of one will be selected as a DEC. Note that DEC doesn't
+ // set CF, so we can't do this for USUBO.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getAPIntValue() == 1) {
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_B;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO:
+ BaseOp = X86ISD::UMUL;
+ Cond = X86::COND_B;
+ break;
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+}
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
+ MVT T = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T.getSimpleVT()) {
+ default:
+ assert(false && "Invalid value type!");
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
+ break;
+ }
+ SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg,
+ Op.getOperand(2), SDValue());
+ SDValue Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5);
+ SDValue cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue TheChain = Op.getOperand(0);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ MVT T = Node->getValueType(0);
+ SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment());
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SHL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerShift(Op, DAG);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::FABS: return LowerFABS(Op, DAG);
+ case ISD::FNEG: return LowerFNEG(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
+ case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ MVT T = Node->getValueType(0);
+ DebugLoc dl = Node->getDebugLoc();
+ assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ // This is a generalized SDNode, not an AtomicSDNode, so it doesn't
+ // have a MemOperand. Pass the info through as a normal operand.
+ SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
+ SDValue Ops[] = { Chain, In1, In2L, In2H, LSI };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5);
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ DebugLoc dl = N->getDebugLoc();
+ switch (N->getOpcode()) {
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::FP_TO_SINT: {
+ std::pair<SDValue,SDValue> Vals =
+ FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ MVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
+ }
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+ rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT T = N->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(0, MVT::i32));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+ DAG.getConstant(1, MVT::i32));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
+ cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(0, MVT::i32));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+ DAG.getConstant(1, MVT::i32));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
+ cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
+ swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
+ MVT::i32, Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
+ MVT::i32, cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ return;
+ }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
+ case X86ISD::SHLD: return "X86ISD::SHLD";
+ case X86ISD::SHRD: return "X86ISD::SHRD";
+ case X86ISD::FAND: return "X86ISD::FAND";
+ case X86ISD::FOR: return "X86ISD::FOR";
+ case X86ISD::FXOR: return "X86ISD::FXOR";
+ case X86ISD::FSRL: return "X86ISD::FSRL";
+ case X86ISD::FILD: return "X86ISD::FILD";
+ case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
+ case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+ case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+ case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FLD: return "X86ISD::FLD";
+ case X86ISD::FST: return "X86ISD::FST";
+ case X86ISD::CALL: return "X86ISD::CALL";
+ case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
+ case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
+ case X86ISD::CMP: return "X86ISD::CMP";
+ case X86ISD::COMI: return "X86ISD::COMI";
+ case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::CMOV: return "X86ISD::CMOV";
+ case X86ISD::BRCOND: return "X86ISD::BRCOND";
+ case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
+ case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
+ case X86ISD::Wrapper: return "X86ISD::Wrapper";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
+ case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
+ case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
+ case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
+ case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
+ case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
+ case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHL: return "X86ISD::VSHL";
+ case X86ISD::VSRL: return "X86ISD::VSRL";
+ case X86ISD::CMPPD: return "X86ISD::CMPPD";
+ case X86ISD::CMPPS: return "X86ISD::CMPPS";
+ case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
+ case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
+ case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
+ case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
+ case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
+ case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
+ case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
+ case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
+ case X86ISD::INC: return "X86ISD::INC";
+ case X86ISD::DEC: return "X86ISD::DEC";
+ case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ // X86 supports extremely general addressing modes.
+
+ // X86 allows a sign-extended 32-bit immediate field as a displacement.
+ if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1)
+ return false;
+
+ if (AM.BaseGV) {
+ // We can only fold this if we don't need an extra load.
+ if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
+ return false;
+ // If BaseGV requires a register, we cannot also have a BaseReg.
+ if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
+ AM.HasBaseReg)
+ return false;
+
+ // X86-64 only supports addr of globals in small code model.
+ if (Subtarget->is64Bit()) {
+ if (getTargetMachine().getCodeModel() != CodeModel::Small)
+ return false;
+ // If lower 4G is not available, then we must use rip-relative addressing.
+ if (AM.BaseOffs || AM.Scale > 1)
+ return false;
+ }
+ }
+
+ switch (AM.Scale) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ // These scales always work.
+ break;
+ case 3:
+ case 5:
+ case 9:
+ // These scales are formed with basereg+scalereg. Only accept if there is
+ // no basereg yet.
+ if (AM.HasBaseReg)
+ return false;
+ break;
+ default: // Other stuff never works.
+ return false;
+ }
+
+ return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const {
+ // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+ return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ MVT VT) const {
+ // Only do shuffles on 128-bit vector types for now.
+ if (VT.getSizeInBits() == 64)
+ return false;
+
+ // FIXME: pshufb, blends, palignr, shifts.
+ return (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isMOVLMask(M, VT) ||
+ isSHUFPMask(M, VT) ||
+ isPSHUFDMask(M, VT) ||
+ isPSHUFHWMask(M, VT) ||
+ isPSHUFLWMask(M, VT) ||
+ isUNPCKLMask(M, VT) ||
+ isUNPCKHMask(M, VT) ||
+ isUNPCKL_v_undef_Mask(M, VT) ||
+ isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+ // FIXME: This collection of masks seems suspect.
+ if (NumElts == 2)
+ return true;
+ if (NumElts == 4 && VT.getSizeInBits() == 128) {
+ return (isMOVLMask(Mask, VT) ||
+ isCommutedMOVLMask(Mask, VT, true) ||
+ isSHUFPMask(Mask, VT) ||
+ isCommutedSHUFPMask(Mask, VT));
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned LoadOpc,
+ unsigned CXchgOpc,
+ unsigned copyOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [bitinstr.addr]
+ // op t2 = t1, [bitinstr.val]
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ DebugLoc dl = bInstr->getDebugLoc();
+ MachineOperand& destOper = bInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ int numArgs = bInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &bInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+ }
+ else
+ tt = t1;
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+ MIB.addReg(tt);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t2);
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+ MIB.addReg(EAXreg);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function: 64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) const {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned copyOpc = X86::MOV32rr;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = bInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 &&
+ "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+ // add 4 to displacement.
+ for (int i=0; i <= lastAddrIndx-2; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = *(argOpers[3]);
+ if (newOp3.isImm())
+ newOp3.setImm(newOp3.getImm()+4);
+ else
+ newOp3.setOffset(newOp3.getOffset()+4);
+ (*MIB).addOperand(newOp3);
+ (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt1).addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(NotOpc), tt2).addReg(t2);
+ } else {
+ tt1 = t1;
+ tt2 = t2;
+ }
+
+ int valArgIndx = lastAddrIndx + 1;
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+ if (regOpcL != X86::MOV32rr)
+ MIB.addReg(tt1);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+ assert(argOpers[valArgIndx + 1]->isReg() ==
+ argOpers[valArgIndx]->isReg());
+ assert(argOpers[valArgIndx + 1]->isImm() ==
+ argOpers[valArgIndx]->isImm());
+ if (argOpers[valArgIndx + 1]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+ if (regOpcH != X86::MOV32rr)
+ MIB.addReg(tt2);
+ (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+ MachineBasicBlock *MBB,
+ unsigned cmovOpc) const {
+ // For the atomic min/max operator, we generate
+ // thisMBB:
+ // newMBB:
+ // ld t1 = [min/max.addr]
+ // mov t2 = [min/max.val]
+ // cmp t1, t2
+ // cmov[cond] t2 = t1
+ // mov EAX = t1
+ // lcs dest = [bitinstr.addr], t2 [EAX is implicit]
+ // bz newMBB
+ // fallthrough -->nextMBB
+ //
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to newMBB and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ DebugLoc dl = mInstr->getDebugLoc();
+ // Insert instructions into newMBB based on incoming instruction
+ assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ "unexpected number of operands");
+ MachineOperand& destOper = mInstr->getOperand(0);
+ MachineOperand* argOpers[2 + X86AddrNumOperands];
+ int numArgs = mInstr->getNumOperands() - 1;
+ for (int i=0; i < numArgs; ++i)
+ argOpers[i] = &mInstr->getOperand(i+1);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int valArgIndx = lastAddrIndx + 1;
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ // We only support register and immediate values
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
+ "invalid operand");
+
+ unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ if (argOpers[valArgIndx]->isReg())
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ else
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ (*MIB).addOperand(*argOpers[valArgIndx]);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+ MIB.addReg(t1);
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+ MIB.addReg(t1);
+ MIB.addReg(t2);
+
+ // Generate movc
+ unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+ MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+ MIB.addReg(t2);
+ MIB.addReg(t1);
+
+ // Cmp and exchange if none has modified the memory location
+ MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MIB.addReg(t3);
+ assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *mInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB.addReg(X86::EAX);
+
+ // insert branch
+ BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case X86::CMOV_V1I64:
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+
+ case X86::FP32_TO_INT16_IN_MEM:
+ case X86::FP32_TO_INT32_IN_MEM:
+ case X86::FP32_TO_INT64_IN_MEM:
+ case X86::FP64_TO_INT16_IN_MEM:
+ case X86::FP64_TO_INT32_IN_MEM:
+ case X86::FP64_TO_INT64_IN_MEM:
+ case X86::FP80_TO_INT16_IN_MEM:
+ case X86::FP80_TO_INT32_IN_MEM:
+ case X86::FP80_TO_INT64_IN_MEM: {
+ // Change the floating point control register to use "round towards zero"
+ // mode when truncating to an integer value.
+ MachineFunction *F = BB->getParent();
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+ // Load the old value of the high byte of the control word...
+ unsigned OldCW =
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW),
+ CWFrameIdx);
+
+ // Set the high part to be round to zero...
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx)
+ .addImm(0xC7F);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ // Restore the memory image of control word to original value
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx)
+ .addReg(OldCW);
+
+ // Get the X86 opcode to use.
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "illegal opcode!");
+ case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+ case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+ case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+ case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+ case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+ case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+ case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+ case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+ case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+ }
+
+ X86AddressMode AM;
+ MachineOperand &Op = MI->getOperand(0);
+ if (Op.isReg()) {
+ AM.BaseType = X86AddressMode::RegBase;
+ AM.Base.Reg = Op.getReg();
+ } else {
+ AM.BaseType = X86AddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = Op.getIndex();
+ }
+ Op = MI->getOperand(1);
+ if (Op.isImm())
+ AM.Scale = Op.getImm();
+ Op = MI->getOperand(2);
+ if (Op.isImm())
+ AM.IndexReg = Op.getImm();
+ Op = MI->getOperand(3);
+ if (Op.isGlobal()) {
+ AM.GV = Op.getGlobal();
+ } else {
+ AM.Disp = Op.getImm();
+ }
+ addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86AddrNumOperands).getReg());
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
+ case X86::ATOMAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+ X86::OR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMXOR32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+ X86::XOR32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass);
+ case X86::ATOMNAND32:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+ X86::AND32ri, X86::MOV32rm,
+ X86::LCMPXCHG32, X86::MOV32rr,
+ X86::NOT32r, X86::EAX,
+ X86::GR32RegisterClass, true);
+ case X86::ATOMMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+ case X86::ATOMMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+ case X86::ATOMUMIN32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+ case X86::ATOMUMAX32:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+ case X86::ATOMAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+ X86::OR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMXOR16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+ X86::XOR16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass);
+ case X86::ATOMNAND16:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+ X86::AND16ri, X86::MOV16rm,
+ X86::LCMPXCHG16, X86::MOV16rr,
+ X86::NOT16r, X86::AX,
+ X86::GR16RegisterClass, true);
+ case X86::ATOMMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+ case X86::ATOMMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+ case X86::ATOMUMIN16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+ case X86::ATOMUMAX16:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+ case X86::ATOMAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+ X86::OR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMXOR8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+ X86::XOR8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass);
+ case X86::ATOMNAND8:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+ X86::AND8ri, X86::MOV8rm,
+ X86::LCMPXCHG8, X86::MOV8rr,
+ X86::NOT8r, X86::AL,
+ X86::GR8RegisterClass, true);
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
+ case X86::ATOMAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+ X86::OR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMXOR64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+ X86::XOR64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass);
+ case X86::ATOMNAND64:
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+ X86::AND64ri32, X86::MOV64rm,
+ X86::LCMPXCHG64, X86::MOV64rr,
+ X86::NOT64r, X86::RAX,
+ X86::GR64RegisterClass, true);
+ case X86::ATOMMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+ case X86::ATOMMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+ case X86::ATOMUMIN64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+ case X86::ATOMUMAX64:
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
+ case X86::ATOMSWAP6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::MOV32rr, X86::MOV32rr,
+ X86::MOV32ri, X86::MOV32ri,
+ false);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ assert((Opc >= ISD::BUILTIN_OP_END ||
+ Opc == ISD::INTRINSIC_WO_CHAIN ||
+ Opc == ISD::INTRINSIC_W_CHAIN ||
+ Opc == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
+ switch (Opc) {
+ default: break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ // These nodes' second result is a boolean.
+ if (Op.getResNo() == 0)
+ break;
+ // Fallthrough
+ case X86ISD::SETCC:
+ KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
+ break;
+ }
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+ GlobalValue* &GA, int64_t &Offset) const{
+ if (N->getOpcode() == X86ISD::Wrapper) {
+ if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+ GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+ return true;
+ }
+ }
+ return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
+ const TargetLowering &TLI) {
+ GlobalValue *GV;
+ int64_t Offset = 0;
+ if (TLI.isGAPlusOffset(Base, GV, Offset))
+ return (GV->getAlignment() >= N && (Offset % N) == 0);
+ // DAG combine handles the stack object case.
+ return false;
+}
+
+static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
+ MVT EVT, SDNode *&Base,
+ SelectionDAG &DAG, MachineFrameInfo *MFI,
+ const TargetLowering &TLI) {
+ Base = NULL;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (N->getMaskElt(i) < 0) {
+ if (!Base)
+ return false;
+ continue;
+ }
+
+ SDValue Elt = DAG.getShuffleScalarElt(N, i);
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return false;
+ if (!Base) {
+ Base = Elt.getNode();
+ if (Base->getOpcode() == ISD::UNDEF)
+ return false;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
+ EVT.getSizeInBits()/8, i, MFI))
+ return false;
+ }
+ return true;
+}
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order. In the case of v2i64, it will see if it can rewrite the
+/// shuffle to be an appropriate build vector so it can take advantage of
+// performBuildVectorCombine.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // For x86-32 machines, if we see an insert and then a shuffle in a v2i64
+ // where the upper half is 0, it is advantageous to rewrite it as a build
+ // vector of (0, val) so it can use movq.
+ if (VT == MVT::v2i64) {
+ SDValue In[2];
+ In[0] = N->getOperand(0);
+ In[1] = N->getOperand(1);
+ int Idx0 = SVN->getMaskElt(0);
+ int Idx1 = SVN->getMaskElt(1);
+ // FIXME: can we take advantage of undef index?
+ if (Idx0 >= 0 && Idx1 >= 0 &&
+ In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) {
+ ConstantSDNode* InsertVecIdx =
+ dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2));
+ if (InsertVecIdx &&
+ InsertVecIdx->getZExtValue() == (unsigned)(Idx0 % 2) &&
+ isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) {
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ In[Idx0/2].getOperand(1),
+ In[Idx1/2].getOperand(Idx1 % 2));
+ }
+ }
+ }
+
+ // Try to combine a vector_shuffle into a 128-bit load.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ SDNode *Base = NULL;
+ if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, Base, DAG, MFI, TLI))
+ return SDValue();
+
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+ if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
+ return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile());
+ return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), LD->getAlignment());
+}
+
+/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
+static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
+ unsigned NumOps = N->getNumOperands();
+ DebugLoc dl = N->getDebugLoc();
+
+ // Ignore single operand BUILD_VECTOR.
+ if (NumOps == 1)
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ // We are looking for load i64 and zero extend. We want to transform
+ // it before legalizer has a chance to expand it. Also look for i64
+ // BUILD_PAIR bit casted to f64.
+ return SDValue();
+ // This must be an insertion into a zero vector.
+ SDValue HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDValue();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDValue();
+ Base = Base->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base))
+ return SDValue();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ // Load type should legal type so we don't have to legalize it.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return ResNode;
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Cond = N->getOperand(0);
+ // Get the LHS/RHS of the select.
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ // If we have SSE[12] support, try to form min/max nodes.
+ if (Subtarget->hasSSE2() &&
+ (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
+ Cond.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+ unsigned Opcode = 0;
+ if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOLE: // (X <= Y) ? X : Y -> min
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
+ case ISD::SETLT:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOGT: // (X > Y) ? X : Y -> max
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
+ case ISD::SETGE:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETOGT: // (X > Y) ? Y : X -> min
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
+ case ISD::SETGE:
+ Opcode = X86ISD::FMIN;
+ break;
+
+ case ISD::SETOLE: // (X <= Y) ? Y : X -> max
+ case ISD::SETULE:
+ case ISD::SETLE:
+ if (!UnsafeFPMath) break;
+ // FALL THROUGH.
+ case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
+ case ISD::SETLT:
+ Opcode = X86ISD::FMAX;
+ break;
+ }
+ }
+
+ if (Opcode)
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ }
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+ // Don't do this for crazy integer types.
+ if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+ // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+ // so that TrueC (the true value) is larger than FalseC.
+ bool NeedsCondInvert = false;
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+ // Efficiently invertible.
+ (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+ (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
+ isa<ConstantSDNode>(Cond.getOperand(1))))) {
+ NeedsCondInvert = true;
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
+ if (FalseC->getAPIntValue() == 0 &&
+ TrueC->getAPIntValue().isPowerOf2()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ if (NeedsCondInvert) // Invert the condition if needed.
+ Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(1, Cond.getValueType()));
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ return Cond;
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the flag operand isn't dead, don't touch this CMOV.
+ if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+ return SDValue();
+
+ // If this is a select between two integer constants, try to do some
+ // optimizations. Note that the operands are ordered the opposite of SELECT
+ // operands.
+ if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+ // larger than FalseC (the false value).
+ X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+
+ if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+ CC = X86::GetOppositeBranchCondition(CC);
+ std::swap(TrueC, FalseC);
+ }
+
+ // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0.
+ // This is efficient for any integer data type (including i8/i16) and
+ // shift amount.
+ if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+ unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+ Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(ShAmt, MVT::i8));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient
+ // for any integer data type, including i8/i16.
+ if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+ FalseC->getValueType(0), Cond);
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+
+ // Optimize cases that will turn into an LEA instruction. This requires
+ // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+ if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+ uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+ if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+ bool isFastMultiplier = false;
+ if (Diff < 10) {
+ switch ((unsigned char)Diff) {
+ default: break;
+ case 1: // result = add base, cond
+ case 2: // result = lea base( , cond*2)
+ case 3: // result = lea base(cond, cond*2)
+ case 4: // result = lea base( , cond*4)
+ case 5: // result = lea base(cond, cond*4)
+ case 8: // result = lea base( , cond*8)
+ case 9: // result = lea base(cond, cond*8)
+ isFastMultiplier = true;
+ break;
+ }
+ }
+
+ if (isFastMultiplier) {
+ APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+ SDValue Cond = N->getOperand(3);
+ Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+ DAG.getConstant(CC, MVT::i8), Cond);
+ // Zero extend the condition if needed.
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+ Cond);
+ // Scale the condition by the difference.
+ if (Diff != 1)
+ Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+ DAG.getConstant(Diff, Cond.getValueType()));
+
+ // Add the base if non-zero.
+ if (FalseC->getAPIntValue() != 0)
+ Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+ SDValue(FalseC, 0));
+ if (N->getNumValues() == 2) // Dead flag value?
+ return DCI.CombineTo(N, Cond, SDValue());
+ return Cond;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DAG.getMachineFunction().
+ getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+ uint64_t MulAmt = C->getZExtValue();
+ if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+ return SDValue();
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((MulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = MulAmt / 9;
+ } else if ((MulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = MulAmt / 5;
+ } else if ((MulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = MulAmt / 3;
+ }
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+ DebugLoc DL = N->getDebugLoc();
+
+ if (isPowerOf2_64(MulAmt2) &&
+ !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply by
+ // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+ // is an add.
+ std::swap(MulAmt1, MulAmt2);
+
+ SDValue NewMul;
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, VT));
+
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, VT));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, NewMul, false);
+ }
+ return SDValue();
+}
+
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+/// when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // On X86 with SSE2 support, we can transform this to a vector shift if
+ // all elements are shifted by the same amount. We can't do this in legalize
+ // because the a constant vector is typically transformed to a constant pool
+ // so we have no knowledge of the shift amount.
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ MVT VT = N->getValueType(0);
+ if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+ return SDValue();
+
+ SDValue ShAmtOp = N->getOperand(1);
+ MVT EltVT = VT.getVectorElementType();
+ DebugLoc DL = N->getDebugLoc();
+ SDValue BaseShAmt;
+ if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ for (; i != NumElts; ++i) {
+ SDValue Arg = ShAmtOp.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != BaseShAmt) {
+ return SDValue();
+ }
+ }
+ } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+ DAG.getIntPtrConstant(0));
+ } else
+ return SDValue();
+
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);
+
+ // The shift amount is identical so we can do a vector shift.
+ SDValue ValOp = N->getOperand(0);
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Unknown shift opcode!");
+ break;
+ case ISD::SHL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRA:
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ case ISD::SRL:
+ if (VT == MVT::v2i64)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v4i32)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+ ValOp, BaseShAmt);
+ if (VT == MVT::v8i16)
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+ ValOp, BaseShAmt);
+ break;
+ }
+ return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+
+ // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ MVT VT = St->getValue().getValueType();
+ if (VT.getSizeInBits() != 64)
+ return SDValue();
+
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
+ if ((VT.isVector() ||
+ (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().getNode();
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDValue, 8> Ops;
+ SDNode* ChainVal = St->getChain().getNode();
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).getNode() == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+
+ if (!Ld || !ISD::isNormalLoad(Ld))
+ return SDValue();
+
+ // If this is not the MMX case, i.e. we are just turning i64 load/store
+ // into f64 load/store, avoid the transformation if there are multiple
+ // uses of the loaded value.
+ if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+ return SDValue();
+
+ DebugLoc LdDL = Ld->getDebugLoc();
+ DebugLoc StDL = N->getDebugLoc();
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+ // pair instead.
+ if (Subtarget->is64Bit() || F64IsLegal) {
+ MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getSrcValue(),
+ Ld->getSrcValueOffset(), Ld->isVolatile(),
+ Ld->getAlignment());
+ SDValue NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ }
+
+ // Otherwise, lower to two pairs of 32-bit loads / stores.
+ SDValue LoAddr = Ld->getBasePtr();
+ SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->getAlignment());
+ SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->isVolatile(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDValue NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+ DAG.getConstant(4, MVT::i32));
+
+ SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+ St->getSrcValue(),
+ St->getSrcValueOffset() + 4,
+ St->isVolatile(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+ }
+ return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // BT ignores high bits in the bit index operand.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.hasOneUse()) {
+ unsigned BitWidth = Op1.getValueSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+ return SDValue();
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::BUILD_VECTOR:
+ return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget);
+ case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
+ case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'A':
+ return C_Register;
+ case 'f':
+ case 'r':
+ case 'R':
+ case 'l':
+ case 'q':
+ case 'Q':
+ case 'x':
+ case 'y':
+ case 'Y':
+ return C_RegisterClass;
+ case 'e':
+ case 'Z':
+ return C_Other;
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(MVT ConstraintVT) const {
+ // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+ // 'f' like normal targets.
+ if (ConstraintVT.isFloatingPoint()) {
+ if (Subtarget->hasSSE2())
+ return "Y";
+ if (Subtarget->hasSSE1())
+ return "x";
+ }
+
+ return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ char Constraint,
+ bool hasMemory,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result(0, 0);
+
+ switch (Constraint) {
+ default: break;
+ case 'I':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 31) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'N':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 255) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
+ case 'e': {
+ // 32-bit signed value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ const ConstantInt *CI = C->getConstantIntValue();
+ if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+ break;
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ }
+ return;
+ }
+ case 'Z': {
+ // 32-bit unsigned value
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ const ConstantInt *CI = C->getConstantIntValue();
+ if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ // FIXME gcc accepts some relocatable values here too, but only in certain
+ // memory models; it's complicated.
+ return;
+ }
+ case 'i': {
+ // Literal immediates are always ok.
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+ // Widen to 64 bits here to get it sign extended.
+ Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+ break;
+ }
+
+ // If we are in non-pic codegen mode, we allow the address of a global (with
+ // an optional displacement) to be used with 'i'.
+ GlobalAddressSDNode *GA = 0;
+ int64_t Offset = 0;
+
+ // Match either (GA), (GA+C), (GA+C1+C2), etc.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+ Offset += GA->getOffset();
+ break;
+ } else if (Op.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Offset += -C->getZExtValue();
+ Op = Op.getOperand(0);
+ continue;
+ }
+ }
+
+ // Otherwise, this isn't something we can handle, reject it.
+ return;
+ }
+
+ if (hasMemory)
+ Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG);
+ else
+ Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ Offset);
+ Result = Op;
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+ Ops, DAG);
+}
+
+std::vector<unsigned> X86TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // FIXME: not handling fp-stack yet!
+ switch (Constraint[0]) { // GCC X86 Constraint Letters
+ default: break; // Unknown constraint letter
+ case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
+ case 'Q': // Q_REGS
+ if (VT == MVT::i32)
+ return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
+ else if (VT == MVT::i16)
+ return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
+ else if (VT == MVT::i8)
+ return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
+ }
+ }
+
+ return std::vector<unsigned>();
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ // First, see if this is a constraint that directly corresponds to an LLVM
+ // register class.
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': // GENERAL_REGS
+ case 'R': // LEGACY_REGS
+ case 'l': // INDEX_REGS
+ if (VT == MVT::i8)
+ return std::make_pair(0U, X86::GR8RegisterClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ return std::make_pair(0U, X86::GR64RegisterClass);
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP32RegisterClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP64RegisterClass);
+ return std::make_pair(0U, X86::RFP80RegisterClass);
+ case 'y': // MMX_REGS if MMX allowed.
+ if (!Subtarget->hasMMX()) break;
+ return std::make_pair(0U, X86::VR64RegisterClass);
+ case 'Y': // SSE_REGS if SSE2 allowed
+ if (!Subtarget->hasSSE2()) break;
+ // FALL THROUGH.
+ case 'x': // SSE_REGS if SSE1 allowed
+ if (!Subtarget->hasSSE1()) break;
+
+ switch (VT.getSimpleVT()) {
+ default: break;
+ // Scalar SSE types.
+ case MVT::f32:
+ case MVT::i32:
+ return std::make_pair(0U, X86::FR32RegisterClass);
+ case MVT::f64:
+ case MVT::i64:
+ return std::make_pair(0U, X86::FR64RegisterClass);
+ // Vector types.
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return std::make_pair(0U, X86::VR128RegisterClass);
+ }
+ break;
+ }
+ }
+
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass*> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (Res.second == 0) {
+ // GCC calls "st(0)" just plain "st".
+ if (StringsEqualNoCase("{st}", Constraint)) {
+ Res.first = X86::ST0;
+ Res.second = X86::RFP80RegisterClass;
+ }
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = X86::GRADRegisterClass;
+ }
+ return Res;
+ }
+
+ // Otherwise, check to see if this is a register class of the wrong value
+ // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+ // turn into {ax},{dx}.
+ if (Res.second->hasType(VT))
+ return Res; // Correct type already, nothing to do.
+
+ // All of the single-register GCC register classes map their values onto
+ // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we
+ // really want an 8-bit or 32-bit register, map to the appropriate register
+ // class and return the appropriate register.
+ if (Res.second == X86::GR16RegisterClass) {
+ if (VT == MVT::i8) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::AL; break;
+ case X86::DX: DestReg = X86::DL; break;
+ case X86::CX: DestReg = X86::CL; break;
+ case X86::BX: DestReg = X86::BL; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR8RegisterClass;
+ }
+ } else if (VT == MVT::i32) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::EAX; break;
+ case X86::DX: DestReg = X86::EDX; break;
+ case X86::CX: DestReg = X86::ECX; break;
+ case X86::BX: DestReg = X86::EBX; break;
+ case X86::SI: DestReg = X86::ESI; break;
+ case X86::DI: DestReg = X86::EDI; break;
+ case X86::BP: DestReg = X86::EBP; break;
+ case X86::SP: DestReg = X86::ESP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR32RegisterClass;
+ }
+ } else if (VT == MVT::i64) {
+ unsigned DestReg = 0;
+ switch (Res.first) {
+ default: break;
+ case X86::AX: DestReg = X86::RAX; break;
+ case X86::DX: DestReg = X86::RDX; break;
+ case X86::CX: DestReg = X86::RCX; break;
+ case X86::BX: DestReg = X86::RBX; break;
+ case X86::SI: DestReg = X86::RSI; break;
+ case X86::DI: DestReg = X86::RDI; break;
+ case X86::BP: DestReg = X86::RBP; break;
+ case X86::SP: DestReg = X86::RSP; break;
+ }
+ if (DestReg) {
+ Res.first = DestReg;
+ Res.second = X86::GR64RegisterClass;
+ }
+ }
+ } else if (Res.second == X86::FR32RegisterClass ||
+ Res.second == X86::FR64RegisterClass ||
+ Res.second == X86::VR128RegisterClass) {
+ // Handle references to XMM physical registers that got mapped into the
+ // wrong class. This can happen with constraints like {xmm0} where the
+ // target independent register mapper will just pick the first match it can
+ // find, ignoring the required type.
+ if (VT == MVT::f32)
+ Res.second = X86::FR32RegisterClass;
+ else if (VT == MVT::f64)
+ Res.second = X86::FR64RegisterClass;
+ else if (X86::VR128RegisterClass->hasType(VT))
+ Res.second = X86::VR128RegisterClass;
+ }
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// X86 Widen vector type
+//===----------------------------------------------------------------------===//
+
+/// getWidenVectorType: given a vector type, returns the type to widen
+/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+
+MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // TODO: In computeRegisterProperty, we can compute the list of legal vector
+ // type based on element type. This would speed up our search (though
+ // it may not be worth it since the size of the list is relatively
+ // small).
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+
+ // On X86, it make sense to widen any vector wider than 1
+ if (NElts <= 1)
+ return MVT::Other;
+
+ for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
+ nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+
+ if (isTypeLegal(SVT) &&
+ SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts)
+ return SVT;
+ }
+ return MVT::Other;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 000000000000..550f8bdf9b64
--- /dev/null
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,705 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+ namespace X86ISD {
+ // X86 Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builtin ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// BSF - Bit scan forward.
+ /// BSR - Bit scan reverse.
+ BSF,
+ BSR,
+
+ /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// X86::SHLDxx and X86::SHRDxx instructions.
+ SHLD,
+ SHRD,
+
+ /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// to X86::ANDPS or X86::ANDPD.
+ FAND,
+
+ /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// to X86::ORPS or X86::ORPD.
+ FOR,
+
+ /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// to X86::XORPS or X86::XORPD.
+ FXOR,
+
+ /// FSRL - Bitwise logical right shift of floating point values. These
+ /// corresponds to X86::PSRLDQ.
+ FSRL,
+
+ /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+ /// integer source in memory and FP reg result. This corresponds to the
+ /// X86::FILD*m instructions. It has three inputs (token chain, address,
+ /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+ /// also produces a flag).
+ FILD,
+ FILD_FLAG,
+
+ /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+ /// integer destination in memory and a FP reg source. This corresponds
+ /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+ /// has two inputs (token chain and address) and two outputs (int value
+ /// and token chain).
+ FP_TO_INT16_IN_MEM,
+ FP_TO_INT32_IN_MEM,
+ FP_TO_INT64_IN_MEM,
+
+ /// FLD - This instruction implements an extending load to FP stack slots.
+ /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+ /// operand, ptr to load from, and a ValueType node indicating the type
+ /// to load to.
+ FLD,
+
+ /// FST - This instruction implements a truncating store to FP stack
+ /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+ /// chain operand, value to store, address, and a ValueType to store it
+ /// as.
+ FST,
+
+ /// CALL/TAILCALL - These operations represent an abstract X86 call
+ /// instruction, which includes a bunch of information. In particular the
+ /// operands of these node are:
+ ///
+ /// #0 - The incoming token chain
+ /// #1 - The callee
+ /// #2 - The number of arg bytes the caller pushes on the stack.
+ /// #3 - The number of arg bytes the callee pops off the stack.
+ /// #4 - The value to pass in AL/AX/EAX (optional)
+ /// #5 - The value to pass in DL/DX/EDX (optional)
+ ///
+ /// The result values of these nodes are:
+ ///
+ /// #0 - The outgoing token chain
+ /// #1 - The first register result value (optional)
+ /// #2 - The second register result value (optional)
+ ///
+ /// The CALL vs TAILCALL distinction boils down to whether the callee is
+ /// known not to modify the caller's stack frame, as is standard with
+ /// LLVM.
+ CALL,
+ TAILCALL,
+
+ /// RDTSC_DAG - This operation implements the lowering for
+ /// readcyclecounter
+ RDTSC_DAG,
+
+ /// X86 compare and logical compare instructions.
+ CMP, COMI, UCOMI,
+
+ /// X86 bit-test instructions.
+ BT,
+
+ /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// operand produced by a CMP instruction.
+ SETCC,
+
+ /// X86 conditional moves. Operand 0 and operand 1 are the two values
+ /// to select from. Operand 2 is the condition code, and operand 3 is the
+ /// flag operand produced by a CMP or TEST instruction. It also writes a
+ /// flag result.
+ CMOV,
+
+ /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+ /// is the block to branch if condition is true, operand 2 is the
+ /// condition code, and operand 3 is the flag operand produced by a CMP
+ /// or TEST instruction.
+ BRCOND,
+
+ /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// 1 is the number of bytes of stack to pop.
+ RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// Wrapper - A wrapper node for TargetConstantPool,
+ /// TargetExternalSymbol, and TargetGlobalAddress.
+ Wrapper,
+
+ /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// relative displacements.
+ WrapperRIP,
+
+ /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRB.
+ PEXTRB,
+
+ /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// i32, corresponds to X86::PEXTRW.
+ PEXTRW,
+
+ /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// of a destination 4 x floatvector.
+ INSERTPS,
+
+ /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRB.
+ PINSRB,
+
+ /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// corresponds to X86::PINSRW.
+ PINSRW,
+
+ /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ PSHUFB,
+
+ /// FMAX, FMIN - Floating point max and min.
+ ///
+ FMAX, FMIN,
+
+ /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+ /// approximation. Note that these typically require refinement
+ /// in order to obtain suitable precision.
+ FRSQRT, FRCP,
+
+ // TLSADDR - Thread Local Storage.
+ TLSADDR,
+
+ // SegmentBaseAddress - The address segment:0
+ SegmentBaseAddress,
+
+ // EH_RETURN - Exception Handling helpers.
+ EH_RETURN,
+
+ /// TC_RETURN - Tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+ LCMPXCHG_DAG,
+ LCMPXCHG8_DAG,
+
+ // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+ // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+ // Atomic 64-bit binary operations.
+ ATOMADD64_DAG,
+ ATOMSUB64_DAG,
+ ATOMOR64_DAG,
+ ATOMXOR64_DAG,
+ ATOMAND64_DAG,
+ ATOMNAND64_DAG,
+ ATOMSWAP64_DAG,
+
+ // FNSTCW16m - Store FP control world into i16 memory.
+ FNSTCW16m,
+
+ // VZEXT_MOVL - Vector move low and zero extend.
+ VZEXT_MOVL,
+
+ // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+ VZEXT_LOAD,
+
+ // VSHL, VSRL - Vector logical left / right shift.
+ VSHL, VSRL,
+
+ // CMPPD, CMPPS - Vector double/float comparison.
+ // CMPPD, CMPPS - Vector double/float comparison.
+ CMPPD, CMPPS,
+
+ // PCMP* - Vector integer comparisons.
+ PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+ PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+ // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
+ ADD, SUB, SMUL, UMUL,
+ INC, DEC,
+
+ // MUL_IMM - X86 specific multiply by immediate.
+ MUL_IMM
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace X86 {
+ /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+ /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+ bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+ /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+ bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+ bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+ /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+ /// <2, 3, 2, 3>
+ bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+ bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+ /// as well as MOVLHPS.
+ bool isMOVHPMask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+ bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+ bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+ /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+ /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+ /// <0, 0, 1, 1>
+ bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+ /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+ /// <2, 2, 3, 3>
+ bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+ /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSS,
+ /// MOVSD, and MOVD, i.e. setting the lowest element.
+ bool isMOVLMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+ bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+ bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+
+ /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+ bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+ /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+ /// instructions.
+ unsigned getShuffleSHUFImmediate(SDNode *N);
+
+ /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
+ /// instructions.
+ unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+ /// getShufflePSHUFKWImmediate - Return the appropriate immediate to shuffle
+ /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
+ /// instructions.
+ unsigned getShufflePSHUFLWImmediate(SDNode *N);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // X86TargetLowering - X86 Implementation of the TargetLowering interface
+ class X86TargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ int RegSaveFrameIndex; // X86-64 vararg func register save area.
+ unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
+ unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
+ int BytesToPopOnReturn; // Number of arg bytes ret should pop.
+ int BytesCallerReserves; // Number of arg bytes caller makes.
+
+ public:
+ explicit X86TargetLowering(X86TargetMachine &TM);
+
+ /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+ /// jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const;
+
+ // Return the number of bytes that a function should pop when it returns (in
+ // addition to the space used by the return address).
+ //
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+
+ // Return the number of bytes that the caller reserves for arguments passed
+ // to this function.
+ unsigned getBytesCallerReserves() const { return BytesCallerReserves; }
+
+ /// getStackPtrReg - Return the stack pointer register we are using: either
+ /// ESP or RSP.
+ unsigned getStackPtrReg() const { return X86StackPtr; }
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. For X86, aggregates
+ /// that contains are placed at 16-byte boundaries while the rest are at
+ /// 4-byte boundaries.
+ virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
+ /// determining it.
+ virtual
+ MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+ bool isSrcConst, bool isSrcStr) const;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual MVT getSetCCResultType(MVT VT) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// in Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual bool
+ isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
+
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ virtual const char *LowerXConstraint(MVT ConstraintVT) const;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
+ /// true it means one of the asm constraint of the inline asm instruction
+ /// being processed is 'm'.
+ virtual void LowerAsmOperandForConstraint(SDValue Op,
+ char ConstraintLetter,
+ bool hasMemory,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// getRegForInlineAsmConstraint - Given a physical register constraint
+ /// (e.g. {edx}), return the register number and the register class for the
+ /// register. This should only be used for C_Register constraints. On
+ /// error, this returns a register number of 0.
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+ /// register EAX to i16 by referencing its sub-register AX.
+ virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isTruncateFree(MVT VT1, MVT VT2) const;
+
+ /// isZExtFree - Return true if any actual instruction that defines a
+ /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+ /// register. This does not necessarily include registers defined in
+ /// unknown ways, such as incoming arguments, or copies from unknown
+ /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+ /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+ /// all instructions that define 32-bit values implicit zero-extend the
+ /// result out to 64 bits.
+ virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+ virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+
+ /// isShuffleMaskLegal - Targets can use this to indicate that they only
+ /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+ /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+ /// values are assumed to be legal.
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const;
+
+ /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+ /// used by Targets can use this to indicate if there is a suitable
+ /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+ /// pool entry.
+ virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+ MVT VT) const;
+
+ /// ShouldShrinkFPConstant - If true, then instruction selection should
+ /// seek to shrink the FP constant of the specified type to a smaller type
+ /// in order to save space and / or reduce runtime.
+ virtual bool ShouldShrinkFPConstant(MVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !X86ScalarSSEf64 || VT == MVT::f80;
+ }
+
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Target which want to do tail call
+ /// optimization should implement this function.
+ virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall,
+ SDValue Ret,
+ SelectionDAG &DAG) const;
+
+ virtual const X86Subtarget* getSubtarget() {
+ return Subtarget;
+ }
+
+ /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+ /// computed in an SSE register, not on the X87 floating point stack.
+ bool isScalarFPTypeInSSEReg(MVT VT) const {
+ return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ }
+
+ /// getWidenVectorType: given a vector type, returns the type to widen
+ /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+ /// If there is no vector type that we want to widen to, returns MVT::Other
+ /// When and were to widen is target dependent based on the cost of
+ /// scalarizing vs using the wider vector type.
+ virtual MVT getWidenVectorType(MVT VT) const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *
+ createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi, DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &,
+ DenseMap<const AllocaInst *, int> &
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &
+#endif
+ );
+
+ private:
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+ const X86RegisterInfo *RegInfo;
+ const TargetData *TD;
+
+ /// X86StackPtr - X86 physical register used as stack ptr.
+ unsigned X86StackPtr;
+
+ /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+ /// floating point ops.
+ /// When SSE is available, use it for f32 operations.
+ /// When SSE2 is available, use it for f64 operations.
+ bool X86ScalarSSEf32;
+ bool X86ScalarSSEf64;
+
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+
+ SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned CC, SDValue Root, unsigned i);
+
+ SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+ const SDValue &StackPtr,
+ const CCValAssign &VA, SDValue Chain,
+ SDValue Arg, ISD::ArgFlagsTy Flags);
+
+ // Call lowering helpers.
+ bool IsCalleePop(bool isVarArg, unsigned CallingConv);
+ bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall);
+ bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall);
+ SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+ SDValue Chain, bool IsTailCall, bool Is64Bit,
+ int FPDiff, DebugLoc dl);
+
+ CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const;
+ NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op);
+ unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
+
+ std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool isSigned);
+
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+ int64_t Offset, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
+ SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot,
+ SelectionDAG &DAG);
+ SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
+
+ SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG);
+
+ void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned NewOp);
+
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff);
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff);
+
+ /// Utility function to emit atomic bitwise operations (and, or, xor).
+ // It takes the bitwise instruction to expand, the associated machine basic
+ // block, and the associated X86 opcodes for reg/reg and reg/imm.
+ MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpc,
+ unsigned immOpc,
+ unsigned loadOpc,
+ unsigned cxchgOpc,
+ unsigned copyOpc,
+ unsigned notOpc,
+ unsigned EAXreg,
+ TargetRegisterClass *RC,
+ bool invSrc = false) const;
+
+ MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+ MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc = false) const;
+
+ /// Utility function to emit atomic min and max. It takes the min/max
+ /// instruction to expand, the associated basic block, and the associated
+ /// cmov opcode for moving the min or max value.
+ MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+ MachineBasicBlock *BB,
+ unsigned cmovOpc) const;
+
+ /// Emit nodes that will be selected as "test Op0,Op0", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
+
+ /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+ /// equivalent, for use with the given x86 condition code.
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SelectionDAG &DAG);
+ };
+
+ namespace X86 {
+ FastISel *createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmi, DwarfWriter *dw,
+ DenseMap<const Value *, unsigned> &,
+ DenseMap<const BasicBlock *, MachineBasicBlock *> &,
+ DenseMap<const AllocaInst *, int> &
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &
+#endif
+ );
+ }
+}
+
+#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
new file mode 100644
index 000000000000..dc15e4aa4ee9
--- /dev/null
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -0,0 +1,1937 @@
+//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86-64 instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// 64-bits but only 8 bits are significant.
+def i64i8imm : Operand<i64>;
+
+def lea64mem : Operand<i64> {
+ let PrintMethod = "printlea64mem";
+ let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm);
+}
+
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printlea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Complex Pattern Definitions.
+//
+def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+ [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments.
+//
+
+def i64immSExt8 : PatLeaf<(i64 imm), [{
+ // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}]>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // unsignedsign extended field.
+ return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}]>;
+
+def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. RSP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in {
+
+ // NOTE: this pattern doesn't match "X86call imm", because we do not know
+ // that the offset between an arbitrary immediate and the call will fit in
+ // the 32-bit pcrel field that we have.
+ def CALL64pcrel32 : I<0xE8, RawFrm,
+ (outs), (ins i64i32imm:$dst, variable_ops),
+ "call\t${dst:call}", []>,
+ Requires<[In64BitMode]>;
+ def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call GR64:$dst)]>;
+ def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
+ }
+
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset,
+ variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
+ variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst),
+ "jmp{q}\t{*}$dst # TAILCALL",
+ []>;
+
+// Branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+ [(brind GR64:$dst)]>;
+ def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+ [(brind (loadi64 addr:$dst))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1 in {
+def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64 : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>;
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP64r : I<0x58, AddRegFrm,
+ (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+let mayStore = 1 in
+def PUSH64r : I<0x50, AddRegFrm,
+ (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
+def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
+def PUSHFQ : I<0x9C, RawFrm, (outs), (ins), "pushf", []>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea64_32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
+ "lea{q}\t{$src|$dst}, {$dst|$src}",
+ [(set GR64:$dst, lea64addr:$src)]>;
+
+let isTwoAddress = 1 in
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsf (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (X86bsr (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Repeat string ops
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+ [(X86rep_movs i64)]>, REP;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+ [(X86rep_stos i64)]>, REP;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+
+let neverHasSideEffects = 1 in
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+let canFoldAsLoad = 1 in
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32:$src, addr:$dst)]>;
+
+// Sign/Zero extenders
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movs{bq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movs{wq|x}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+
+let neverHasSideEffects = 1 in {
+ let Defs = [RAX], Uses = [EAX] in
+ def CDQE : RI<0x98, RawFrm, (outs), (ins),
+ "{cltq|cdqe}", []>; // RAX = signext(EAX)
+
+ let Defs = [RAX,RDX], Uses = [RAX] in
+ def CQO : RI<0x99, RawFrm, (outs), (ins),
+ "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions...
+//
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isConvertibleToThreeAddress = 1 in {
+let isCommutable = 1 in
+// Register-Register Addition
+def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Addition
+def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isConvertibleToThreeAddress
+
+// Register-Memory Addition
+def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+// Memory-Register Addition
+def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR64:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
+ "add{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let Uses = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+
+def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>;
+
+def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>;
+def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
+} // isTwoAddress
+
+def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "adc{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+} // Uses = [EFLAGS]
+
+let isTwoAddress = 1 in {
+// Register-Register Subtraction
+def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Memory Subtraction
+def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Subtraction
+def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
+ (ins GR64:$src1, i64i8imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
+ (ins GR64:$src1, i64i32imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+// Memory-Register Subtraction
+def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Subtraction
+def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "sub{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+
+let Uses = [EFLAGS] in {
+let isTwoAddress = 1 in {
+def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+
+def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>;
+
+def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>;
+def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
+} // isTwoAddress
+
+def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>;
+def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
+ "sbb{q}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+} // Uses = [EFLAGS]
+} // Defs = [EFLAGS]
+
+// Unsigned multiplication
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in {
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
+let mayLoad = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+ "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+
+// Signed multiplication
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64
+let mayLoad = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+ "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64]
+}
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+// Register-Register Signed Integer Multiplication
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+ (ins GR64:$src1, GR64:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>, TB;
+
+// Register-Memory Signed Integer Multiplication
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$src1, i64mem:$src2),
+ "imul{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB;
+} // isTwoAddress
+
+// Suprisingly enough, these are not two address instructions!
+
+// Register-Integer Signed Integer Multiplication
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Signed Integer Multiplication
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
+ (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1),
+ i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, (mul (load addr:$src1),
+ i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Unsigned division / remainder
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in {
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "div{q}\t$src", []>;
+// Signed division / remainder
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), // RDX:RAX/r64 = RAX,RDX
+ "idiv{q}\t$src", []>;
+let mayLoad = 1 in {
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "div{q}\t$src", []>;
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // RDX:RAX/[mem64] = RAX,RDX
+ "idiv{q}\t$src", []>;
+}
+}
+
+// Unary instructions
+let Defs = [EFLAGS], CodeSize = 2 in {
+let isTwoAddress = 1 in
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
+ [(set GR64:$dst, (ineg GR64:$src)),
+ (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+ [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
+ [(set GR64:$dst, (add GR64:$src, 1)),
+ (implicit EFLAGS)]>;
+def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
+ [(set GR64:$dst, (add GR64:$src, -1)),
+ (implicit EFLAGS)]>;
+def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+ [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, 1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, 1)),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, -1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, -1)),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress
+
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+ def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+}
+} // Defs = [EFLAGS], CodeSize
+
+
+let Defs = [EFLAGS] in {
+// Shift instructions
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (shl GR64:$src, CL))]>;
+let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shl{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
+// cheaper.
+} // isTwoAddress
+
+let Uses = [CL] in
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shl{q}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+ "shl{q}\t$dst",
+ [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (srl GR64:$src, CL))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "shr{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+ "shr{q}\t$dst",
+ [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "shr{q}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+ "shr{q}\t$dst",
+ [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (sra GR64:$src, CL))]>;
+def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "sar{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+ "sar{q}\t$dst",
+ [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "sar{q}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+ "sar{q}\t$dst",
+ [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Rotate instructions
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotl GR64:$src, CL))]>;
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "rol{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+ "rol{q}\t$dst",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def ROL64mCL : I<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "rol{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+ "rol{q}\t$dst",
+ [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let isTwoAddress = 1 in {
+let Uses = [CL] in
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(set GR64:$dst, (rotr GR64:$src, CL))]>;
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+ "ror{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+ "ror{q}\t$dst",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // isTwoAddress
+
+let Uses = [CL] in
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+ "ror{q}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+ "ror{q}\t$dst",
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+// Double shift instructions (generalizations of rotate)
+let isTwoAddress = 1 in {
+let Uses = [CL] in {
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, TB;
+}
+
+let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+} // isCommutable
+} // isTwoAddress
+
+let Uses = [CL] in {
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+ "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+ addr:$dst)]>, TB;
+}
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Logical Instructions...
+//
+
+let isTwoAddress = 1 , AddedComplexity = 15 in
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
+ [(set GR64:$dst, (not GR64:$src))]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+ [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+
+let Defs = [EFLAGS] in {
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def AND64rr : RI<0x21, MRMDestReg,
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def AND64rm : RI<0x23, MRMSrcMem,
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def AND64ri8 : RIi8<0x83, MRM4r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def AND64ri32 : RIi32<0x81, MRM4r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "and{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def AND64mr : RI<0x21, MRMDestMem,
+ (outs), (ins i64mem:$dst, GR64:$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def AND64mi8 : RIi8<0x83, MRM4m,
+ (outs), (ins i64mem:$dst, i64i8imm :$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def AND64mi32 : RIi32<0x81, MRM4m,
+ (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "and{q}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "or{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "or{q}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 1 in {
+let isCommutable = 1 in
+def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)),
+ (implicit EFLAGS)]>;
+def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+def XOR64ri32 : RIi32<0x81, MRM6r,
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "xor{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)),
+ (implicit EFLAGS)]>;
+} // isTwoAddress
+
+def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR64:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "xor{q}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+// Integer comparison
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in
+def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, GR64:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+def TEST64ri32 : RIi32<0xF7, MRM0r, (outs),
+ (ins GR64:$src1, i64i32imm:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
+ (ins i64mem:$src1, i64i32imm:$src2),
+ "test{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0),
+ (implicit EFLAGS)]>;
+
+def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), GR64:$src2),
+ (implicit EFLAGS)]>;
+def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP64mi32 : RIi32<0x81, MRM7m, (outs),
+ (ins i64mem:$src1, i64i32imm:$src2),
+ "cmp{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Bit tests.
+// TODO: BTC, BTR, and BTS
+let Defs = [EFLAGS] in {
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+// "bt{q}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi64 addr:$src1), GR64:$src2),
+// (implicit EFLAGS)]>, TB;
+
+def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Conditional moves
+let Uses = [EFLAGS], isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_B, EFLAGS))]>, TB;
+def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_AE, EFLAGS))]>, TB;
+def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_E, EFLAGS))]>, TB;
+def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NE, EFLAGS))]>, TB;
+def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_BE, EFLAGS))]>, TB;
+def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_A, EFLAGS))]>, TB;
+def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_L, EFLAGS))]>, TB;
+def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_GE, EFLAGS))]>, TB;
+def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_LE, EFLAGS))]>, TB;
+def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_G, EFLAGS))]>, TB;
+def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_S, EFLAGS))]>, TB;
+def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NS, EFLAGS))]>, TB;
+def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_P, EFLAGS))]>, TB;
+def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NP, EFLAGS))]>, TB;
+def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_O, EFLAGS))]>, TB;
+def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64
+ (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2,
+ X86_COND_NO, EFLAGS))]>, TB;
+} // isCommutable = 1
+
+def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_B, EFLAGS))]>, TB;
+def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_AE, EFLAGS))]>, TB;
+def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_E, EFLAGS))]>, TB;
+def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NE, EFLAGS))]>, TB;
+def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_BE, EFLAGS))]>, TB;
+def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_A, EFLAGS))]>, TB;
+def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_L, EFLAGS))]>, TB;
+def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_GE, EFLAGS))]>, TB;
+def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_LE, EFLAGS))]>, TB;
+def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_G, EFLAGS))]>, TB;
+def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_S, EFLAGS))]>, TB;
+def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NS, EFLAGS))]>, TB;
+def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_P, EFLAGS))]>, TB;
+def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NP, EFLAGS))]>, TB;
+def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_O, EFLAGS))]>, TB;
+def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
+ (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ X86_COND_NO, EFLAGS))]>, TB;
+} // isTwoAddress
+
+//===----------------------------------------------------------------------===//
+// Conversion Instructions...
+//
+
+// f64 -> signed i64
+def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
+def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
+ "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
+ (load addr:$src)))]>;
+def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
+def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f128mem:$src),
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse2_cvttsd2si64
+ (load addr:$src)))]>;
+
+// Signed i64 -> f64
+def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+
+let isTwoAddress = 1 in {
+def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
+ "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsi642sd VR128:$src1,
+ GR64:$src2))]>;
+def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvtsi642sd VR128:$src1,
+ (loadi64 addr:$src2)))]>;
+} // isTwoAddress
+
+// Signed i64 -> f32
+def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
+def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
+
+let isTwoAddress = 1 in {
+ def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ GR64:$src2))]>;
+ def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (int_x86_sse_cvtsi642ss VR128:$src1,
+ (loadi64 addr:$src2)))]>;
+}
+
+// f32 -> signed i64
+def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvtss2si64 VR128:$src))]>;
+def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (int_x86_sse_cvtss2si64
+ (load addr:$src)))]>;
+def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvttss2si64 VR128:$src))]>;
+def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst,
+ (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
+// equivalent due to implicit zero-extending, and it sometimes has a smaller
+// encoding.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], AddedComplexity = 1,
+ isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins),
+ "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
+ [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR64:$dst, i64immZExt32:$src)]>;
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//===----------------------------------------------------------------------===//
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+ ".byte\t0x66; "
+ "leaq\t${sym:mem}(%rip), %rdi; "
+ ".word\t0x6666; "
+ "rex64; "
+ "call\t__tls_get_addr@PLT",
+ [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ Requires<[In64BitMode]>;
+
+let AddedComplexity = 5 in
+def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movq\t%gs:$src, $dst",
+ [(set GR64:$dst, (gsload addr:$src))]>, SegGS;
+
+let AddedComplexity = 5 in
+def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movq\t%fs:$src, $dst",
+ [(set GR64:$dst, (fsload addr:$src))]>, SegFS;
+
+//===----------------------------------------------------------------------===//
+// Atomic Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, EFLAGS], Uses = [RAX] in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+ "lock\n\t"
+ "cmpxchgq\t$swap,$ptr",
+ [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+let Constraints = "$val = $dst" in {
+let Defs = [EFLAGS] in
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
+ "lock\n\t"
+ "xadd\t$val, $ptr",
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ TB, LOCK;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),
+ "xchg\t$val, $ptr",
+ [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+}
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMXOR64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMNAND64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+ "#ATOMMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMIN64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+ "#ATOMUMAX64 PSEUDO!",
+ [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
+ (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
+ (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+ (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+ (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
+
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tconstpool:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tjumptable:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV64mi32 addr:$dst, texternalsym:$src)>,
+ Requires<[SmallCode, IsStatic]>;
+
+// Calls
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (CALL64pcrel32 texternalsym:$dst)>;
+
+def : Pat<(X86tailcall GR64:$dst),
+ (CALL64r GR64:$dst)>;
+
+
+// tailcall stuff
+def : Pat<(X86tailcall GR32:$dst),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i64 texternalsym:$dst)),
+ (TAILCALL)>;
+
+def : Pat<(X86tcret GR64:$dst, imm:$off),
+ (TCRETURNri64 GR64:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+ (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
+ (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO64rm GR64:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO64rm GR64:$src2, addr:$src1)>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload
+// When extloading from 16-bit and smaller memory locations into 64-bit registers,
+// use zero-extending loads so that the entire 64-bit register is defined, avoiding
+// partial-register updates.
+def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src),
+ x86_subreg_32bit)>;
+def : Pat<(extloadi16i1 addr:$src),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
+ x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(extloadi16i8 addr:$src),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
+ x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+// anyext
+def : Pat<(i64 (anyext GR8:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>;
+def : Pat<(i64 (anyext GR16:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>;
+def : Pat<(i16 (anyext GR8:$src)),
+ (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext GR8:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+ (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+ (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+ Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+ Requires<[In64BitMode]>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+ Requires<[In64BitMode]>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ Requires<[In64BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+ (SUBREG_TO_REG
+ (i64 0),
+ (MOVZX32_NOREXrr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR64:$src, GR64_ABCD),
+ x86_subreg_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+ (MOV8mr_NOREX
+ addr:$dst,
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In64BitMode]>;
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
+ (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
+ (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
+ (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+ (SAR64mCL addr:$dst)>;
+
+// (or (x >> c) | (y << (64 - c))) ==> (shrd64 x, y, c)
+def : Pat<(or (srl GR64:$src1, CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), CL:$amt),
+ (shl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(or (srl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHRD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (srl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (shl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHRD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+def : Pat<(or (shl GR64:$src1, CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), CL:$amt),
+ (srl GR64:$src2, (sub 64, CL:$amt))), addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(or (shl GR64:$src1, (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ (SHLD64rrCL GR64:$src1, GR64:$src2)>;
+
+def : Pat<(store (or (shl (loadi64 addr:$dst), (i8 (trunc RCX:$amt))),
+ (srl GR64:$src2, (i8 (trunc (sub 64, RCX:$amt))))),
+ addr:$dst),
+ (SHLD64mrCL addr:$dst, GR64:$src2)>;
+
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+ (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
+ GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR64:$src1, GR64:$src2),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(addc GR64:$src1, (load addr:$src2)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
+ (ADD64ri32 GR64:$src1, imm:$src2)>;
+
+def : Pat<(subc GR64:$src1, GR64:$src2),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(subc GR64:$src1, (load addr:$src2)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(subc GR64:$src1, imm:$src2),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// Register-Register Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Integer Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Register-Memory Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD64rm GR64:$src1, addr:$src2)>;
+
+// Memory-Register Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mr addr:$dst, GR64:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (SUB64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Memory Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB64rm GR64:$src1, addr:$src2)>;
+
+// Register-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Memory-Register Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mr addr:$dst, GR64:$src2)>;
+
+// Memory-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mi8 addr:$dst, i64immSExt8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), i64immSExt32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB64mi32 addr:$dst, i64immSExt32:$src2)>;
+
+// Register-Register Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rr GR64:$src1, GR64:$src2)>;
+
+// Register-Memory Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL64rm GR64:$src1, addr:$src2)>;
+
+// Register-Integer Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Memory-Integer Signed Integer Multiplication with EFLAGS result
+def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
+ (implicit EFLAGS)),
+ (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// INC and DEC with EFLAGS result. Note that these do not set CF.
+def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
+ (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64_16m addr:$dst)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
+ (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
+ (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64_32m addr:$dst)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
+ (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
+ (INC64r GR64:$src)>;
+def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC64m addr:$dst)>;
+def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
+ (DEC64r GR64:$src)>;
+def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC64m addr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE Instructions
+//===----------------------------------------------------------------------===//
+
+// Move instructions...
+
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ (iPTR 0)))]>;
+
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))]>;
+def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+
+let isTwoAddress = 1 in {
+ multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize, REX_W;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize, REX_W;
+ }
+}
+
+defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 000000000000..39504cd44cea
--- /dev/null
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,168 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call. X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense. Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly. The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FrameIndex;
+ } Base;
+
+ unsigned Scale;
+ unsigned IndexReg;
+ unsigned Disp;
+ GlobalValue *GV;
+
+ X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+ Base.Reg = 0;
+ }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB,
+ unsigned Reg) {
+ // Because memory references are always represented with four
+ // values, this adds: Reg, [1, NoReg, 0] to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
+}
+
+inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB,
+ int Offset) {
+ return MIB.addImm(1).addReg(0).addImm(Offset);
+}
+
+inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB,
+ int Offset) {
+ return addLeaOffset(MIB, Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill,
+ int Offset) {
+ return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB,
+ unsigned Reg, bool isKill,
+ int Offset) {
+ return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+ unsigned Reg1, bool isKill1,
+ unsigned Reg2, bool isKill2) {
+ return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0);
+}
+
+inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
+ if (AM.BaseType == X86AddressMode::RegBase)
+ MIB.addReg(AM.Base.Reg);
+ else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+ MIB.addFrameIndex(AM.Base.FrameIndex);
+ else
+ assert (0);
+ MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+ if (AM.GV)
+ return MIB.addGlobalAddress(AM.GV, AM.Disp);
+ else
+ return MIB.addImm(AM.Disp);
+}
+
+inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ return addLeaAddress(MIB, AM).addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned Flags = 0;
+ if (TID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (TID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI),
+ Flags,
+ MFI.getObjectOffset(FI) + Offset,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ return addOffset(MIB.addFrameIndex(FI), Offset)
+ .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool. The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+ unsigned GlobalBaseReg = 0) {
+ //FIXME: factor this
+ return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+ .addConstantPoolIndex(CPI).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 000000000000..bc7def457c0f
--- /dev/null
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,597 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+ SDTCisVT<1, f80>]>;
+def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+ SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
+ [SDNPHasChain, SDNPInFlag, SDNPMayStore]>;
+def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+ [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+ [SDNPHasChain, SDNPMayStore]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def FP32_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP32:$src),
+ "##FP32_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP32:$src),
+ "##FP32_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+ def FP32_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP32:$src),
+ "##FP32_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+ def FP64_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP64:$src),
+ "##FP64_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP64:$src),
+ "##FP64_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+ def FP64_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP64:$src),
+ "##FP64_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+ def FP80_TO_INT16_IN_MEM : I<0, Pseudo,
+ (outs), (ins i16mem:$dst, RFP80:$src),
+ "##FP80_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT32_IN_MEM : I<0, Pseudo,
+ (outs), (ins i32mem:$dst, RFP80:$src),
+ "##FP80_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+ def FP80_TO_INT64_IN_MEM : I<0, Pseudo,
+ (outs), (ins i64mem:$dst, RFP80:$src),
+ "##FP80_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+ let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+ def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>;
+
+// All FP Stack operations are represented with four instructions here. The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information. These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler. These use "RST" registers, although frequently
+// the actual register(s) used are implicit. These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instructions for FP stack return values.
+def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+
+// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when
+// there are two values live out on the stack from a call or inlineasm. This
+// magic is handled by the stackifier. It is not valid to emit FpGET_ST1* and
+// then FpGET_ST0*. In addition, it is invalid for any FP-using operations to
+// occur between them.
+def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+
+let Defs = [ST0] in {
+def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
+}
+
+let Defs = [ST1] in {
+def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
+}
+
+// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+ FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Register copies. Just copies, the shortening ones do not truncate.
+let neverHasSideEffects = 1 in {
+ def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
+ def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
+ def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
+ def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
+ def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
+}
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+ [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+ [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+ [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; }
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; }
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src1,
+ (X86fild addr:$src2, i32)))]>;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { let mayLoad = 1; }
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { let mayLoad = 1; }
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+ : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+ [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+ [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+ [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Floating point cmovs.
+multiclass FPCMov<PatLeaf cc> {
+ def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+ CondMovFP,
+ [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+ cc, EFLAGS))]>;
+ def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+ CondMovFP,
+ [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+ cc, EFLAGS))]>;
+ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ CondMovFP,
+ [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+ cc, EFLAGS))]>;
+}
+let Uses = [EFLAGS], isTwoAddress = 1 in {
+defm CMOVB : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE : FPCMov<X86_COND_E>;
+defm CMOVP : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+}
+
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+ "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1, mayHaveSideEffects = 1 in
+ def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+ [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+ [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+ [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+ [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+ [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+ [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+ [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+ Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+ [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+ [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+ [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ []>; // FPSW = cmp ST(0) with ST(i)
+
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(X86cmp RFP32:$lhs, RFP32:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(X86cmp RFP64:$lhs, RFP64:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+ [(X86cmp RFP80:$lhs, RFP80:$rhs),
+ (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i)
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucom\t$reg">, DD;
+def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomp\t$reg">, DD;
+def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop
+ (outs), (ins),
+ "fucompp">, DA;
+
+def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i)
+ (outs), (ins RST:$reg),
+ "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB;
+def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop
+ (outs), (ins RST:$reg),
+ "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF;
+}
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags
+ (outs), (ins), "fnstsw", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
+ (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+ [(X86fp_cwd_get16 addr:$dst)]>;
+
+let mayLoad = 1 in
+def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
+ (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
+ Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack. We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
+ Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
+ Requires<[FPStackf64]>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 000000000000..eeed5bd27ff3
--- /dev/null
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,285 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+ bits<6> Value = val;
+}
+
+def Pseudo : Format<0>; def RawFrm : Format<1>;
+def AddRegFrm : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg : Format<5>;
+def MRMSrcMem : Format<6>;
+def MRM0r : Format<16>; def MRM1r : Format<17>; def MRM2r : Format<18>;
+def MRM3r : Format<19>; def MRM4r : Format<20>; def MRM5r : Format<21>;
+def MRM6r : Format<22>; def MRM7r : Format<23>;
+def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>;
+def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>;
+def MRM6m : Format<30>; def MRM7m : Format<31>;
+def MRMInitReg : Format<32>;
+
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+ bits<3> Value = val;
+}
+def NoImm : ImmType<0>;
+def Imm8 : ImmType<1>;
+def Imm16 : ImmType<2>;
+def Imm32 : ImmType<3>;
+def Imm64 : ImmType<4>;
+
+// FPFormat - This specifies what form this FP instruction has. This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+ bits<3> Value = val;
+}
+def NotFP : FPFormat<0>;
+def ZeroArgFP : FPFormat<1>;
+def OneArgFP : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP : FPFormat<4>;
+def CompareFP : FPFormat<5>;
+def CondMovFP : FPFormat<6>;
+def SpecialFP : FPFormat<7>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W { bit hasREX_WPrefix = 1; }
+class LOCK { bit hasLockPrefix = 1; }
+class SegFS { bits<2> SegOvrBits = 1; }
+class SegGS { bits<2> SegOvrBits = 2; }
+class TB { bits<4> Prefix = 1; }
+class REP { bits<4> Prefix = 2; }
+class D8 { bits<4> Prefix = 3; }
+class D9 { bits<4> Prefix = 4; }
+class DA { bits<4> Prefix = 5; }
+class DB { bits<4> Prefix = 6; }
+class DC { bits<4> Prefix = 7; }
+class DD { bits<4> Prefix = 8; }
+class DE { bits<4> Prefix = 9; }
+class DF { bits<4> Prefix = 10; }
+class XD { bits<4> Prefix = 11; }
+class XS { bits<4> Prefix = 12; }
+class T8 { bits<4> Prefix = 13; }
+class TA { bits<4> Prefix = 14; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+ string AsmStr>
+ : Instruction {
+ let Namespace = "X86";
+
+ bits<8> Opcode = opcod;
+ Format Form = f;
+ bits<6> FormBits = Form.Value;
+ ImmType ImmT = i;
+ bits<3> ImmTypeBits = ImmT.Value;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ string AsmString = AsmStr;
+
+ //
+ // Attributes specific to X86 instructions...
+ //
+ bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
+ bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
+
+ bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
+ FPFormat FPForm; // What flavor of FP instruction is this?
+ bits<3> FPFormBits = 0;
+ bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
+ bits<2> SegOvrBits = 0; // Segment override prefix.
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, NoImm, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm16, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
+ : X86Inst<o, f, Imm32, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+ : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+ : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+ let FPForm = fp; let FPFormBits = FPForm.Value;
+ let Pattern = pattern;
+}
+
+// SSE1 Instruction Templates:
+//
+// SSI - SSE1 instructions with XS prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+
+// SSE2 Instruction Templates:
+//
+// SDI - SSE2 instructions with XD prefix.
+// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
+// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+// S3I - SSE3 instructions with TB and OpSize prefixes.
+// S3SI - SSE3 instructions with XS prefix.
+// S3DI - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+// SS38I - SSSE3 instructions with T8 prefix.
+// SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+// SS48I - SSE 4.1 instructions with T8 prefix.
+// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+// SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID - MMX instructions with XD prefix.
+// MMXIS - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 000000000000..2cd3733f0fb3
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3227 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86GenInstrInfo.inc"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+ cl::opt<bool>
+ NoFusing("disable-spill-fusing",
+ cl::desc("Disable fusing of spill code into instructions"));
+ cl::opt<bool>
+ PrintFailedFusing("print-failed-fuse-candidates",
+ cl::desc("Print instructions that the allocator wants to"
+ " fuse, but the X86 backend currently can't"),
+ cl::Hidden);
+ cl::opt<bool>
+ ReMatPICStubLoad("remat-pic-stub-load",
+ cl::desc("Re-materialize load from stub in PIC mode"),
+ cl::init(false), cl::Hidden);
+}
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+ : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
+ TM(tm), RI(tm, *this) {
+ SmallVector<unsigned,16> AmbEntries;
+ static const unsigned OpTbl2Addr[][2] = {
+ { X86::ADC32ri, X86::ADC32mi },
+ { X86::ADC32ri8, X86::ADC32mi8 },
+ { X86::ADC32rr, X86::ADC32mr },
+ { X86::ADC64ri32, X86::ADC64mi32 },
+ { X86::ADC64ri8, X86::ADC64mi8 },
+ { X86::ADC64rr, X86::ADC64mr },
+ { X86::ADD16ri, X86::ADD16mi },
+ { X86::ADD16ri8, X86::ADD16mi8 },
+ { X86::ADD16rr, X86::ADD16mr },
+ { X86::ADD32ri, X86::ADD32mi },
+ { X86::ADD32ri8, X86::ADD32mi8 },
+ { X86::ADD32rr, X86::ADD32mr },
+ { X86::ADD64ri32, X86::ADD64mi32 },
+ { X86::ADD64ri8, X86::ADD64mi8 },
+ { X86::ADD64rr, X86::ADD64mr },
+ { X86::ADD8ri, X86::ADD8mi },
+ { X86::ADD8rr, X86::ADD8mr },
+ { X86::AND16ri, X86::AND16mi },
+ { X86::AND16ri8, X86::AND16mi8 },
+ { X86::AND16rr, X86::AND16mr },
+ { X86::AND32ri, X86::AND32mi },
+ { X86::AND32ri8, X86::AND32mi8 },
+ { X86::AND32rr, X86::AND32mr },
+ { X86::AND64ri32, X86::AND64mi32 },
+ { X86::AND64ri8, X86::AND64mi8 },
+ { X86::AND64rr, X86::AND64mr },
+ { X86::AND8ri, X86::AND8mi },
+ { X86::AND8rr, X86::AND8mr },
+ { X86::DEC16r, X86::DEC16m },
+ { X86::DEC32r, X86::DEC32m },
+ { X86::DEC64_16r, X86::DEC64_16m },
+ { X86::DEC64_32r, X86::DEC64_32m },
+ { X86::DEC64r, X86::DEC64m },
+ { X86::DEC8r, X86::DEC8m },
+ { X86::INC16r, X86::INC16m },
+ { X86::INC32r, X86::INC32m },
+ { X86::INC64_16r, X86::INC64_16m },
+ { X86::INC64_32r, X86::INC64_32m },
+ { X86::INC64r, X86::INC64m },
+ { X86::INC8r, X86::INC8m },
+ { X86::NEG16r, X86::NEG16m },
+ { X86::NEG32r, X86::NEG32m },
+ { X86::NEG64r, X86::NEG64m },
+ { X86::NEG8r, X86::NEG8m },
+ { X86::NOT16r, X86::NOT16m },
+ { X86::NOT32r, X86::NOT32m },
+ { X86::NOT64r, X86::NOT64m },
+ { X86::NOT8r, X86::NOT8m },
+ { X86::OR16ri, X86::OR16mi },
+ { X86::OR16ri8, X86::OR16mi8 },
+ { X86::OR16rr, X86::OR16mr },
+ { X86::OR32ri, X86::OR32mi },
+ { X86::OR32ri8, X86::OR32mi8 },
+ { X86::OR32rr, X86::OR32mr },
+ { X86::OR64ri32, X86::OR64mi32 },
+ { X86::OR64ri8, X86::OR64mi8 },
+ { X86::OR64rr, X86::OR64mr },
+ { X86::OR8ri, X86::OR8mi },
+ { X86::OR8rr, X86::OR8mr },
+ { X86::ROL16r1, X86::ROL16m1 },
+ { X86::ROL16rCL, X86::ROL16mCL },
+ { X86::ROL16ri, X86::ROL16mi },
+ { X86::ROL32r1, X86::ROL32m1 },
+ { X86::ROL32rCL, X86::ROL32mCL },
+ { X86::ROL32ri, X86::ROL32mi },
+ { X86::ROL64r1, X86::ROL64m1 },
+ { X86::ROL64rCL, X86::ROL64mCL },
+ { X86::ROL64ri, X86::ROL64mi },
+ { X86::ROL8r1, X86::ROL8m1 },
+ { X86::ROL8rCL, X86::ROL8mCL },
+ { X86::ROL8ri, X86::ROL8mi },
+ { X86::ROR16r1, X86::ROR16m1 },
+ { X86::ROR16rCL, X86::ROR16mCL },
+ { X86::ROR16ri, X86::ROR16mi },
+ { X86::ROR32r1, X86::ROR32m1 },
+ { X86::ROR32rCL, X86::ROR32mCL },
+ { X86::ROR32ri, X86::ROR32mi },
+ { X86::ROR64r1, X86::ROR64m1 },
+ { X86::ROR64rCL, X86::ROR64mCL },
+ { X86::ROR64ri, X86::ROR64mi },
+ { X86::ROR8r1, X86::ROR8m1 },
+ { X86::ROR8rCL, X86::ROR8mCL },
+ { X86::ROR8ri, X86::ROR8mi },
+ { X86::SAR16r1, X86::SAR16m1 },
+ { X86::SAR16rCL, X86::SAR16mCL },
+ { X86::SAR16ri, X86::SAR16mi },
+ { X86::SAR32r1, X86::SAR32m1 },
+ { X86::SAR32rCL, X86::SAR32mCL },
+ { X86::SAR32ri, X86::SAR32mi },
+ { X86::SAR64r1, X86::SAR64m1 },
+ { X86::SAR64rCL, X86::SAR64mCL },
+ { X86::SAR64ri, X86::SAR64mi },
+ { X86::SAR8r1, X86::SAR8m1 },
+ { X86::SAR8rCL, X86::SAR8mCL },
+ { X86::SAR8ri, X86::SAR8mi },
+ { X86::SBB32ri, X86::SBB32mi },
+ { X86::SBB32ri8, X86::SBB32mi8 },
+ { X86::SBB32rr, X86::SBB32mr },
+ { X86::SBB64ri32, X86::SBB64mi32 },
+ { X86::SBB64ri8, X86::SBB64mi8 },
+ { X86::SBB64rr, X86::SBB64mr },
+ { X86::SHL16rCL, X86::SHL16mCL },
+ { X86::SHL16ri, X86::SHL16mi },
+ { X86::SHL32rCL, X86::SHL32mCL },
+ { X86::SHL32ri, X86::SHL32mi },
+ { X86::SHL64rCL, X86::SHL64mCL },
+ { X86::SHL64ri, X86::SHL64mi },
+ { X86::SHL8rCL, X86::SHL8mCL },
+ { X86::SHL8ri, X86::SHL8mi },
+ { X86::SHLD16rrCL, X86::SHLD16mrCL },
+ { X86::SHLD16rri8, X86::SHLD16mri8 },
+ { X86::SHLD32rrCL, X86::SHLD32mrCL },
+ { X86::SHLD32rri8, X86::SHLD32mri8 },
+ { X86::SHLD64rrCL, X86::SHLD64mrCL },
+ { X86::SHLD64rri8, X86::SHLD64mri8 },
+ { X86::SHR16r1, X86::SHR16m1 },
+ { X86::SHR16rCL, X86::SHR16mCL },
+ { X86::SHR16ri, X86::SHR16mi },
+ { X86::SHR32r1, X86::SHR32m1 },
+ { X86::SHR32rCL, X86::SHR32mCL },
+ { X86::SHR32ri, X86::SHR32mi },
+ { X86::SHR64r1, X86::SHR64m1 },
+ { X86::SHR64rCL, X86::SHR64mCL },
+ { X86::SHR64ri, X86::SHR64mi },
+ { X86::SHR8r1, X86::SHR8m1 },
+ { X86::SHR8rCL, X86::SHR8mCL },
+ { X86::SHR8ri, X86::SHR8mi },
+ { X86::SHRD16rrCL, X86::SHRD16mrCL },
+ { X86::SHRD16rri8, X86::SHRD16mri8 },
+ { X86::SHRD32rrCL, X86::SHRD32mrCL },
+ { X86::SHRD32rri8, X86::SHRD32mri8 },
+ { X86::SHRD64rrCL, X86::SHRD64mrCL },
+ { X86::SHRD64rri8, X86::SHRD64mri8 },
+ { X86::SUB16ri, X86::SUB16mi },
+ { X86::SUB16ri8, X86::SUB16mi8 },
+ { X86::SUB16rr, X86::SUB16mr },
+ { X86::SUB32ri, X86::SUB32mi },
+ { X86::SUB32ri8, X86::SUB32mi8 },
+ { X86::SUB32rr, X86::SUB32mr },
+ { X86::SUB64ri32, X86::SUB64mi32 },
+ { X86::SUB64ri8, X86::SUB64mi8 },
+ { X86::SUB64rr, X86::SUB64mr },
+ { X86::SUB8ri, X86::SUB8mi },
+ { X86::SUB8rr, X86::SUB8mr },
+ { X86::XOR16ri, X86::XOR16mi },
+ { X86::XOR16ri8, X86::XOR16mi8 },
+ { X86::XOR16rr, X86::XOR16mr },
+ { X86::XOR32ri, X86::XOR32mi },
+ { X86::XOR32ri8, X86::XOR32mi8 },
+ { X86::XOR32rr, X86::XOR32mr },
+ { X86::XOR64ri32, X86::XOR64mi32 },
+ { X86::XOR64ri8, X86::XOR64mi8 },
+ { X86::XOR64rr, X86::XOR64mr },
+ { X86::XOR8ri, X86::XOR8mi },
+ { X86::XOR8rr, X86::XOR8mr }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+ unsigned RegOp = OpTbl2Addr[i][0];
+ unsigned MemOp = OpTbl2Addr[i][1];
+ if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp,
+ AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ // If the third value is 1, then it's folding either a load or a store.
+ static const unsigned OpTbl0[][3] = {
+ { X86::BT16ri8, X86::BT16mi8, 1 },
+ { X86::BT32ri8, X86::BT32mi8, 1 },
+ { X86::BT64ri8, X86::BT64mi8, 1 },
+ { X86::CALL32r, X86::CALL32m, 1 },
+ { X86::CALL64r, X86::CALL64m, 1 },
+ { X86::CMP16ri, X86::CMP16mi, 1 },
+ { X86::CMP16ri8, X86::CMP16mi8, 1 },
+ { X86::CMP16rr, X86::CMP16mr, 1 },
+ { X86::CMP32ri, X86::CMP32mi, 1 },
+ { X86::CMP32ri8, X86::CMP32mi8, 1 },
+ { X86::CMP32rr, X86::CMP32mr, 1 },
+ { X86::CMP64ri32, X86::CMP64mi32, 1 },
+ { X86::CMP64ri8, X86::CMP64mi8, 1 },
+ { X86::CMP64rr, X86::CMP64mr, 1 },
+ { X86::CMP8ri, X86::CMP8mi, 1 },
+ { X86::CMP8rr, X86::CMP8mr, 1 },
+ { X86::DIV16r, X86::DIV16m, 1 },
+ { X86::DIV32r, X86::DIV32m, 1 },
+ { X86::DIV64r, X86::DIV64m, 1 },
+ { X86::DIV8r, X86::DIV8m, 1 },
+ { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 },
+ { X86::FsMOVAPDrr, X86::MOVSDmr, 0 },
+ { X86::FsMOVAPSrr, X86::MOVSSmr, 0 },
+ { X86::IDIV16r, X86::IDIV16m, 1 },
+ { X86::IDIV32r, X86::IDIV32m, 1 },
+ { X86::IDIV64r, X86::IDIV64m, 1 },
+ { X86::IDIV8r, X86::IDIV8m, 1 },
+ { X86::IMUL16r, X86::IMUL16m, 1 },
+ { X86::IMUL32r, X86::IMUL32m, 1 },
+ { X86::IMUL64r, X86::IMUL64m, 1 },
+ { X86::IMUL8r, X86::IMUL8m, 1 },
+ { X86::JMP32r, X86::JMP32m, 1 },
+ { X86::JMP64r, X86::JMP64m, 1 },
+ { X86::MOV16ri, X86::MOV16mi, 0 },
+ { X86::MOV16rr, X86::MOV16mr, 0 },
+ { X86::MOV32ri, X86::MOV32mi, 0 },
+ { X86::MOV32rr, X86::MOV32mr, 0 },
+ { X86::MOV64ri32, X86::MOV64mi32, 0 },
+ { X86::MOV64rr, X86::MOV64mr, 0 },
+ { X86::MOV8ri, X86::MOV8mi, 0 },
+ { X86::MOV8rr, X86::MOV8mr, 0 },
+ { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 },
+ { X86::MOVAPDrr, X86::MOVAPDmr, 0 },
+ { X86::MOVAPSrr, X86::MOVAPSmr, 0 },
+ { X86::MOVDQArr, X86::MOVDQAmr, 0 },
+ { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 },
+ { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 },
+ { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 },
+ { X86::MOVSDrr, X86::MOVSDmr, 0 },
+ { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 },
+ { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 },
+ { X86::MOVSSrr, X86::MOVSSmr, 0 },
+ { X86::MOVUPDrr, X86::MOVUPDmr, 0 },
+ { X86::MOVUPSrr, X86::MOVUPSmr, 0 },
+ { X86::MUL16r, X86::MUL16m, 1 },
+ { X86::MUL32r, X86::MUL32m, 1 },
+ { X86::MUL64r, X86::MUL64m, 1 },
+ { X86::MUL8r, X86::MUL8m, 1 },
+ { X86::SETAEr, X86::SETAEm, 0 },
+ { X86::SETAr, X86::SETAm, 0 },
+ { X86::SETBEr, X86::SETBEm, 0 },
+ { X86::SETBr, X86::SETBm, 0 },
+ { X86::SETEr, X86::SETEm, 0 },
+ { X86::SETGEr, X86::SETGEm, 0 },
+ { X86::SETGr, X86::SETGm, 0 },
+ { X86::SETLEr, X86::SETLEm, 0 },
+ { X86::SETLr, X86::SETLm, 0 },
+ { X86::SETNEr, X86::SETNEm, 0 },
+ { X86::SETNOr, X86::SETNOm, 0 },
+ { X86::SETNPr, X86::SETNPm, 0 },
+ { X86::SETNSr, X86::SETNSm, 0 },
+ { X86::SETOr, X86::SETOm, 0 },
+ { X86::SETPr, X86::SETPm, 0 },
+ { X86::SETSr, X86::SETSm, 0 },
+ { X86::TAILJMPr, X86::TAILJMPm, 1 },
+ { X86::TEST16ri, X86::TEST16mi, 1 },
+ { X86::TEST32ri, X86::TEST32mi, 1 },
+ { X86::TEST64ri32, X86::TEST64mi32, 1 },
+ { X86::TEST8ri, X86::TEST8mi, 1 }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+ unsigned RegOp = OpTbl0[i][0];
+ unsigned MemOp = OpTbl0[i][1];
+ if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned FoldedLoad = OpTbl0[i][2];
+ // Index 0, folded load or store.
+ unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
+ if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ static const unsigned OpTbl1[][2] = {
+ { X86::CMP16rr, X86::CMP16rm },
+ { X86::CMP32rr, X86::CMP32rm },
+ { X86::CMP64rr, X86::CMP64rm },
+ { X86::CMP8rr, X86::CMP8rm },
+ { X86::CVTSD2SSrr, X86::CVTSD2SSrm },
+ { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
+ { X86::CVTSI2SDrr, X86::CVTSI2SDrm },
+ { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
+ { X86::CVTSI2SSrr, X86::CVTSI2SSrm },
+ { X86::CVTSS2SDrr, X86::CVTSS2SDrm },
+ { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
+ { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
+ { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
+ { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
+ { X86::FsMOVAPDrr, X86::MOVSDrm },
+ { X86::FsMOVAPSrr, X86::MOVSSrm },
+ { X86::IMUL16rri, X86::IMUL16rmi },
+ { X86::IMUL16rri8, X86::IMUL16rmi8 },
+ { X86::IMUL32rri, X86::IMUL32rmi },
+ { X86::IMUL32rri8, X86::IMUL32rmi8 },
+ { X86::IMUL64rri32, X86::IMUL64rmi32 },
+ { X86::IMUL64rri8, X86::IMUL64rmi8 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm },
+ { X86::Int_COMISDrr, X86::Int_COMISDrm },
+ { X86::Int_COMISSrr, X86::Int_COMISSrm },
+ { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm },
+ { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm },
+ { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm },
+ { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
+ { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
+ { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
+ { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
+ { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
+ { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
+ { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
+ { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
+ { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
+ { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
+ { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
+ { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
+ { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
+ { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
+ { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
+ { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
+ { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
+ { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
+ { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
+ { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
+ { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
+ { X86::MOV16rr, X86::MOV16rm },
+ { X86::MOV32rr, X86::MOV32rm },
+ { X86::MOV64rr, X86::MOV64rm },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm },
+ { X86::MOV64toSDrr, X86::MOV64toSDrm },
+ { X86::MOV8rr, X86::MOV8rm },
+ { X86::MOVAPDrr, X86::MOVAPDrm },
+ { X86::MOVAPSrr, X86::MOVAPSrm },
+ { X86::MOVDDUPrr, X86::MOVDDUPrm },
+ { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm },
+ { X86::MOVDI2SSrr, X86::MOVDI2SSrm },
+ { X86::MOVDQArr, X86::MOVDQArm },
+ { X86::MOVSD2PDrr, X86::MOVSD2PDrm },
+ { X86::MOVSDrr, X86::MOVSDrm },
+ { X86::MOVSHDUPrr, X86::MOVSHDUPrm },
+ { X86::MOVSLDUPrr, X86::MOVSLDUPrm },
+ { X86::MOVSS2PSrr, X86::MOVSS2PSrm },
+ { X86::MOVSSrr, X86::MOVSSrm },
+ { X86::MOVSX16rr8, X86::MOVSX16rm8 },
+ { X86::MOVSX32rr16, X86::MOVSX32rm16 },
+ { X86::MOVSX32rr8, X86::MOVSX32rm8 },
+ { X86::MOVSX64rr16, X86::MOVSX64rm16 },
+ { X86::MOVSX64rr32, X86::MOVSX64rm32 },
+ { X86::MOVSX64rr8, X86::MOVSX64rm8 },
+ { X86::MOVUPDrr, X86::MOVUPDrm },
+ { X86::MOVUPSrr, X86::MOVUPSrm },
+ { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm },
+ { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm },
+ { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
+ { X86::MOVZX16rr8, X86::MOVZX16rm8 },
+ { X86::MOVZX32rr16, X86::MOVZX32rm16 },
+ { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
+ { X86::MOVZX32rr8, X86::MOVZX32rm8 },
+ { X86::MOVZX64rr16, X86::MOVZX64rm16 },
+ { X86::MOVZX64rr32, X86::MOVZX64rm32 },
+ { X86::MOVZX64rr8, X86::MOVZX64rm8 },
+ { X86::PSHUFDri, X86::PSHUFDmi },
+ { X86::PSHUFHWri, X86::PSHUFHWmi },
+ { X86::PSHUFLWri, X86::PSHUFLWmi },
+ { X86::RCPPSr, X86::RCPPSm },
+ { X86::RCPPSr_Int, X86::RCPPSm_Int },
+ { X86::RSQRTPSr, X86::RSQRTPSm },
+ { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int },
+ { X86::RSQRTSSr, X86::RSQRTSSm },
+ { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int },
+ { X86::SQRTPDr, X86::SQRTPDm },
+ { X86::SQRTPDr_Int, X86::SQRTPDm_Int },
+ { X86::SQRTPSr, X86::SQRTPSm },
+ { X86::SQRTPSr_Int, X86::SQRTPSm_Int },
+ { X86::SQRTSDr, X86::SQRTSDm },
+ { X86::SQRTSDr_Int, X86::SQRTSDm_Int },
+ { X86::SQRTSSr, X86::SQRTSSm },
+ { X86::SQRTSSr_Int, X86::SQRTSSm_Int },
+ { X86::TEST16rr, X86::TEST16rm },
+ { X86::TEST32rr, X86::TEST32rm },
+ { X86::TEST64rr, X86::TEST64rm },
+ { X86::TEST8rr, X86::TEST8rm },
+ // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+ { X86::UCOMISDrr, X86::UCOMISDrm },
+ { X86::UCOMISSrr, X86::UCOMISSrm }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+ unsigned RegOp = OpTbl1[i][0];
+ unsigned MemOp = OpTbl1[i][1];
+ if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load
+ if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ static const unsigned OpTbl2[][2] = {
+ { X86::ADC32rr, X86::ADC32rm },
+ { X86::ADC64rr, X86::ADC64rm },
+ { X86::ADD16rr, X86::ADD16rm },
+ { X86::ADD32rr, X86::ADD32rm },
+ { X86::ADD64rr, X86::ADD64rm },
+ { X86::ADD8rr, X86::ADD8rm },
+ { X86::ADDPDrr, X86::ADDPDrm },
+ { X86::ADDPSrr, X86::ADDPSrm },
+ { X86::ADDSDrr, X86::ADDSDrm },
+ { X86::ADDSSrr, X86::ADDSSrm },
+ { X86::ADDSUBPDrr, X86::ADDSUBPDrm },
+ { X86::ADDSUBPSrr, X86::ADDSUBPSrm },
+ { X86::AND16rr, X86::AND16rm },
+ { X86::AND32rr, X86::AND32rm },
+ { X86::AND64rr, X86::AND64rm },
+ { X86::AND8rr, X86::AND8rm },
+ { X86::ANDNPDrr, X86::ANDNPDrm },
+ { X86::ANDNPSrr, X86::ANDNPSrm },
+ { X86::ANDPDrr, X86::ANDPDrm },
+ { X86::ANDPSrr, X86::ANDPSrm },
+ { X86::CMOVA16rr, X86::CMOVA16rm },
+ { X86::CMOVA32rr, X86::CMOVA32rm },
+ { X86::CMOVA64rr, X86::CMOVA64rm },
+ { X86::CMOVAE16rr, X86::CMOVAE16rm },
+ { X86::CMOVAE32rr, X86::CMOVAE32rm },
+ { X86::CMOVAE64rr, X86::CMOVAE64rm },
+ { X86::CMOVB16rr, X86::CMOVB16rm },
+ { X86::CMOVB32rr, X86::CMOVB32rm },
+ { X86::CMOVB64rr, X86::CMOVB64rm },
+ { X86::CMOVBE16rr, X86::CMOVBE16rm },
+ { X86::CMOVBE32rr, X86::CMOVBE32rm },
+ { X86::CMOVBE64rr, X86::CMOVBE64rm },
+ { X86::CMOVE16rr, X86::CMOVE16rm },
+ { X86::CMOVE32rr, X86::CMOVE32rm },
+ { X86::CMOVE64rr, X86::CMOVE64rm },
+ { X86::CMOVG16rr, X86::CMOVG16rm },
+ { X86::CMOVG32rr, X86::CMOVG32rm },
+ { X86::CMOVG64rr, X86::CMOVG64rm },
+ { X86::CMOVGE16rr, X86::CMOVGE16rm },
+ { X86::CMOVGE32rr, X86::CMOVGE32rm },
+ { X86::CMOVGE64rr, X86::CMOVGE64rm },
+ { X86::CMOVL16rr, X86::CMOVL16rm },
+ { X86::CMOVL32rr, X86::CMOVL32rm },
+ { X86::CMOVL64rr, X86::CMOVL64rm },
+ { X86::CMOVLE16rr, X86::CMOVLE16rm },
+ { X86::CMOVLE32rr, X86::CMOVLE32rm },
+ { X86::CMOVLE64rr, X86::CMOVLE64rm },
+ { X86::CMOVNE16rr, X86::CMOVNE16rm },
+ { X86::CMOVNE32rr, X86::CMOVNE32rm },
+ { X86::CMOVNE64rr, X86::CMOVNE64rm },
+ { X86::CMOVNO16rr, X86::CMOVNO16rm },
+ { X86::CMOVNO32rr, X86::CMOVNO32rm },
+ { X86::CMOVNO64rr, X86::CMOVNO64rm },
+ { X86::CMOVNP16rr, X86::CMOVNP16rm },
+ { X86::CMOVNP32rr, X86::CMOVNP32rm },
+ { X86::CMOVNP64rr, X86::CMOVNP64rm },
+ { X86::CMOVNS16rr, X86::CMOVNS16rm },
+ { X86::CMOVNS32rr, X86::CMOVNS32rm },
+ { X86::CMOVNS64rr, X86::CMOVNS64rm },
+ { X86::CMOVO16rr, X86::CMOVO16rm },
+ { X86::CMOVO32rr, X86::CMOVO32rm },
+ { X86::CMOVO64rr, X86::CMOVO64rm },
+ { X86::CMOVP16rr, X86::CMOVP16rm },
+ { X86::CMOVP32rr, X86::CMOVP32rm },
+ { X86::CMOVP64rr, X86::CMOVP64rm },
+ { X86::CMOVS16rr, X86::CMOVS16rm },
+ { X86::CMOVS32rr, X86::CMOVS32rm },
+ { X86::CMOVS64rr, X86::CMOVS64rm },
+ { X86::CMPPDrri, X86::CMPPDrmi },
+ { X86::CMPPSrri, X86::CMPPSrmi },
+ { X86::CMPSDrr, X86::CMPSDrm },
+ { X86::CMPSSrr, X86::CMPSSrm },
+ { X86::DIVPDrr, X86::DIVPDrm },
+ { X86::DIVPSrr, X86::DIVPSrm },
+ { X86::DIVSDrr, X86::DIVSDrm },
+ { X86::DIVSSrr, X86::DIVSSrm },
+ { X86::FsANDNPDrr, X86::FsANDNPDrm },
+ { X86::FsANDNPSrr, X86::FsANDNPSrm },
+ { X86::FsANDPDrr, X86::FsANDPDrm },
+ { X86::FsANDPSrr, X86::FsANDPSrm },
+ { X86::FsORPDrr, X86::FsORPDrm },
+ { X86::FsORPSrr, X86::FsORPSrm },
+ { X86::FsXORPDrr, X86::FsXORPDrm },
+ { X86::FsXORPSrr, X86::FsXORPSrm },
+ { X86::HADDPDrr, X86::HADDPDrm },
+ { X86::HADDPSrr, X86::HADDPSrm },
+ { X86::HSUBPDrr, X86::HSUBPDrm },
+ { X86::HSUBPSrr, X86::HSUBPSrm },
+ { X86::IMUL16rr, X86::IMUL16rm },
+ { X86::IMUL32rr, X86::IMUL32rm },
+ { X86::IMUL64rr, X86::IMUL64rm },
+ { X86::MAXPDrr, X86::MAXPDrm },
+ { X86::MAXPDrr_Int, X86::MAXPDrm_Int },
+ { X86::MAXPSrr, X86::MAXPSrm },
+ { X86::MAXPSrr_Int, X86::MAXPSrm_Int },
+ { X86::MAXSDrr, X86::MAXSDrm },
+ { X86::MAXSDrr_Int, X86::MAXSDrm_Int },
+ { X86::MAXSSrr, X86::MAXSSrm },
+ { X86::MAXSSrr_Int, X86::MAXSSrm_Int },
+ { X86::MINPDrr, X86::MINPDrm },
+ { X86::MINPDrr_Int, X86::MINPDrm_Int },
+ { X86::MINPSrr, X86::MINPSrm },
+ { X86::MINPSrr_Int, X86::MINPSrm_Int },
+ { X86::MINSDrr, X86::MINSDrm },
+ { X86::MINSDrr_Int, X86::MINSDrm_Int },
+ { X86::MINSSrr, X86::MINSSrm },
+ { X86::MINSSrr_Int, X86::MINSSrm_Int },
+ { X86::MULPDrr, X86::MULPDrm },
+ { X86::MULPSrr, X86::MULPSrm },
+ { X86::MULSDrr, X86::MULSDrm },
+ { X86::MULSSrr, X86::MULSSrm },
+ { X86::OR16rr, X86::OR16rm },
+ { X86::OR32rr, X86::OR32rm },
+ { X86::OR64rr, X86::OR64rm },
+ { X86::OR8rr, X86::OR8rm },
+ { X86::ORPDrr, X86::ORPDrm },
+ { X86::ORPSrr, X86::ORPSrm },
+ { X86::PACKSSDWrr, X86::PACKSSDWrm },
+ { X86::PACKSSWBrr, X86::PACKSSWBrm },
+ { X86::PACKUSWBrr, X86::PACKUSWBrm },
+ { X86::PADDBrr, X86::PADDBrm },
+ { X86::PADDDrr, X86::PADDDrm },
+ { X86::PADDQrr, X86::PADDQrm },
+ { X86::PADDSBrr, X86::PADDSBrm },
+ { X86::PADDSWrr, X86::PADDSWrm },
+ { X86::PADDWrr, X86::PADDWrm },
+ { X86::PANDNrr, X86::PANDNrm },
+ { X86::PANDrr, X86::PANDrm },
+ { X86::PAVGBrr, X86::PAVGBrm },
+ { X86::PAVGWrr, X86::PAVGWrm },
+ { X86::PCMPEQBrr, X86::PCMPEQBrm },
+ { X86::PCMPEQDrr, X86::PCMPEQDrm },
+ { X86::PCMPEQWrr, X86::PCMPEQWrm },
+ { X86::PCMPGTBrr, X86::PCMPGTBrm },
+ { X86::PCMPGTDrr, X86::PCMPGTDrm },
+ { X86::PCMPGTWrr, X86::PCMPGTWrm },
+ { X86::PINSRWrri, X86::PINSRWrmi },
+ { X86::PMADDWDrr, X86::PMADDWDrm },
+ { X86::PMAXSWrr, X86::PMAXSWrm },
+ { X86::PMAXUBrr, X86::PMAXUBrm },
+ { X86::PMINSWrr, X86::PMINSWrm },
+ { X86::PMINUBrr, X86::PMINUBrm },
+ { X86::PMULDQrr, X86::PMULDQrm },
+ { X86::PMULHUWrr, X86::PMULHUWrm },
+ { X86::PMULHWrr, X86::PMULHWrm },
+ { X86::PMULLDrr, X86::PMULLDrm },
+ { X86::PMULLDrr_int, X86::PMULLDrm_int },
+ { X86::PMULLWrr, X86::PMULLWrm },
+ { X86::PMULUDQrr, X86::PMULUDQrm },
+ { X86::PORrr, X86::PORrm },
+ { X86::PSADBWrr, X86::PSADBWrm },
+ { X86::PSLLDrr, X86::PSLLDrm },
+ { X86::PSLLQrr, X86::PSLLQrm },
+ { X86::PSLLWrr, X86::PSLLWrm },
+ { X86::PSRADrr, X86::PSRADrm },
+ { X86::PSRAWrr, X86::PSRAWrm },
+ { X86::PSRLDrr, X86::PSRLDrm },
+ { X86::PSRLQrr, X86::PSRLQrm },
+ { X86::PSRLWrr, X86::PSRLWrm },
+ { X86::PSUBBrr, X86::PSUBBrm },
+ { X86::PSUBDrr, X86::PSUBDrm },
+ { X86::PSUBSBrr, X86::PSUBSBrm },
+ { X86::PSUBSWrr, X86::PSUBSWrm },
+ { X86::PSUBWrr, X86::PSUBWrm },
+ { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm },
+ { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm },
+ { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm },
+ { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm },
+ { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm },
+ { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm },
+ { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm },
+ { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm },
+ { X86::PXORrr, X86::PXORrm },
+ { X86::SBB32rr, X86::SBB32rm },
+ { X86::SBB64rr, X86::SBB64rm },
+ { X86::SHUFPDrri, X86::SHUFPDrmi },
+ { X86::SHUFPSrri, X86::SHUFPSrmi },
+ { X86::SUB16rr, X86::SUB16rm },
+ { X86::SUB32rr, X86::SUB32rm },
+ { X86::SUB64rr, X86::SUB64rm },
+ { X86::SUB8rr, X86::SUB8rm },
+ { X86::SUBPDrr, X86::SUBPDrm },
+ { X86::SUBPSrr, X86::SUBPSrm },
+ { X86::SUBSDrr, X86::SUBSDrm },
+ { X86::SUBSSrr, X86::SUBSSrm },
+ // FIXME: TEST*rr -> swapped operand of TEST*mr.
+ { X86::UNPCKHPDrr, X86::UNPCKHPDrm },
+ { X86::UNPCKHPSrr, X86::UNPCKHPSrm },
+ { X86::UNPCKLPDrr, X86::UNPCKLPDrm },
+ { X86::UNPCKLPSrr, X86::UNPCKLPSrm },
+ { X86::XOR16rr, X86::XOR16rm },
+ { X86::XOR32rr, X86::XOR32rm },
+ { X86::XOR64rr, X86::XOR64rm },
+ { X86::XOR8rr, X86::XOR8rm },
+ { X86::XORPDrr, X86::XORPDrm },
+ { X86::XORPSrr, X86::XORPSrm }
+ };
+
+ for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+ unsigned RegOp = OpTbl2[i][0];
+ unsigned MemOp = OpTbl2[i][1];
+ if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
+ MemOp)).second)
+ assert(false && "Duplicated entries?");
+ unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load
+ if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
+ std::make_pair(RegOp, AuxInfo))).second)
+ AmbEntries.push_back(MemOp);
+ }
+
+ // Remove ambiguous entries.
+ assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
+}
+
+bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case X86::MOV8rr:
+ case X86::MOV8rr_NOREX:
+ case X86::MOV16rr:
+ case X86::MOV32rr:
+ case X86::MOV64rr:
+ case X86::MOVSSrr:
+ case X86::MOVSDrr:
+
+ // FP Stack register class copies
+ case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
+ case X86::MOV_Fp3264: case X86::MOV_Fp3280:
+ case X86::MOV_Fp6432: case X86::MOV_Fp8032:
+
+ case X86::FsMOVAPSrr:
+ case X86::FsMOVAPDrr:
+ case X86::MOVAPSrr:
+ case X86::MOVAPDrr:
+ case X86::MOVDQArr:
+ case X86::MOVSS2PSrr:
+ case X86::MOVSD2PDrr:
+ case X86::MOVPS2SSrr:
+ case X86::MOVPD2SDrr:
+ case X86::MMX_MOVQ64rr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() &&
+ MI.getOperand(1).isReg() &&
+ "invalid register-register move instruction");
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
+ return true;
+ }
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm:
+ if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
+ MI->getOperand(2).getImm() == 1 &&
+ MI->getOperand(3).getReg() == 0 &&
+ MI->getOperand(4).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8mr:
+ case X86::MOV16mr:
+ case X86::MOV32mr:
+ case X86::MOV64mr:
+ case X86::ST_FpP64m:
+ case X86::MOVSSmr:
+ case X86::MOVSDmr:
+ case X86::MOVAPSmr:
+ case X86::MOVAPDmr:
+ case X86::MOVDQAmr:
+ case X86::MMX_MOVD64mr:
+ case X86::MMX_MOVQ64mr:
+ case X86::MMX_MOVNTQmr:
+ if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+ MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
+ MI->getOperand(1).getImm() == 1 &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImm() == 0) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(X86AddrNumOperands).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
+ return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::MOV8rm:
+ case X86::MOV16rm:
+ case X86::MOV32rm:
+ case X86::MOV64rm:
+ case X86::LD_Fp64m:
+ case X86::MOVSSrm:
+ case X86::MOVSDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVAPDrm:
+ case X86::MOVDQArm:
+ case X86::MMX_MOVD64rm:
+ case X86::MMX_MOVQ64rm: {
+ // Loads from constant pools are trivially rematerializable.
+ if (MI->getOperand(1).isReg() &&
+ MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ (MI->getOperand(4).isCPI() ||
+ (MI->getOperand(4).isGlobal() &&
+ isGVStub(MI->getOperand(4).getGlobal(), TM)))) {
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of PIC load.
+ if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ return false;
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool isPICBase = false;
+ for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+ E = MRI.def_end(); I != E; ++I) {
+ MachineInstr *DefMI = I.getOperand().getParent();
+ if (DefMI->getOpcode() != X86::MOVPC32r)
+ return false;
+ assert(!isPICBase && "More than one PIC base?");
+ isPICBase = true;
+ }
+ return isPICBase;
+ }
+ return false;
+ }
+
+ case X86::LEA32r:
+ case X86::LEA64r: {
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ !MI->getOperand(4).isReg()) {
+ // lea fi#, lea GV, etc. are all rematerializable.
+ if (!MI->getOperand(1).isReg())
+ return true;
+ unsigned BaseReg = MI->getOperand(1).getReg();
+ if (BaseReg == 0)
+ return true;
+ // Allow re-materialization of lea PICBase + x.
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return regIsPICBase(BaseReg, MRI);
+ }
+ return false;
+ }
+ }
+
+ // All other instructions marked M_REMATERIALIZABLE are always trivially
+ // rematerializable.
+ return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// two instructions it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ // It's always safe to clobber EFLAGS at the end of a block.
+ if (I == MBB.end())
+ return true;
+
+ // For compile time consideration, if we are not able to determine the
+ // safety after visiting 2 instructions, we will assume it's not safe.
+ for (unsigned i = 0; i < 2; ++i) {
+ bool SeenDef = false;
+ for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() == X86::EFLAGS) {
+ if (MO.isUse())
+ return false;
+ SeenDef = true;
+ }
+ }
+
+ if (SeenDef)
+ // This instruction defines EFLAGS, no need to look any further.
+ return true;
+ ++I;
+
+ // If we make it to the end of the block, it's safe to clobber EFLAGS.
+ if (I == MBB.end())
+ return true;
+ }
+
+ // Conservative answer.
+ return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg,
+ const MachineInstr *Orig) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ unsigned SubIdx = Orig->getOperand(0).isReg()
+ ? Orig->getOperand(0).getSubReg() : 0;
+ bool ChangeSubIdx = SubIdx != 0;
+ if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg = RI.getSubReg(DestReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ // MOV32r0 etc. are implemented with xor which clobbers condition code.
+ // Re-materialize them as movri instructions to avoid side effects.
+ bool Emitted = false;
+ switch (Orig->getOpcode()) {
+ default: break;
+ case X86::MOV8r0:
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0: {
+ if (!isSafeToClobberEFLAGS(MBB, I)) {
+ unsigned Opc = 0;
+ switch (Orig->getOpcode()) {
+ default: break;
+ case X86::MOV8r0: Opc = X86::MOV8ri; break;
+ case X86::MOV16r0: Opc = X86::MOV16ri; break;
+ case X86::MOV32r0: Opc = X86::MOV32ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri32; break;
+ }
+ BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
+ Emitted = true;
+ }
+ break;
+ }
+ }
+
+ if (!Emitted) {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+ }
+
+ if (ChangeSubIdx) {
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
+ }
+}
+
+/// isInvariantLoad - Return true if the specified instruction (which is marked
+/// mayLoad) is loading from a location whose value is invariant across the
+/// function. For example, loading a value from the constant pool or from
+/// from the argument area of a function if it does not change. This should
+/// only return true of *all* loads the instruction does are invariant (if it
+/// does multiple loads).
+bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const {
+ // This code cares about loads from three cases: constant pool entries,
+ // invariant argument slots, and global stubs. In order to handle these cases
+ // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV
+ // operand and base our analysis on it. This is safe because the address of
+ // none of these three cases is ever used as anything other than a load base
+ // and X86 doesn't have any instructions that load from multiple places.
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Loads from constant pools are trivially invariant.
+ if (MO.isCPI())
+ return true;
+
+ if (MO.isGlobal())
+ return isGVStub(MO.getGlobal(), TM);
+
+ // If this is a load from an invariant stack slot, the load is a constant.
+ if (MO.isFI()) {
+ const MachineFrameInfo &MFI =
+ *MI->getParent()->getParent()->getFrameInfo();
+ int Idx = MO.getIndex();
+ return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx);
+ }
+ }
+
+ // All other instances of these instructions are presumed to have other
+ // issues.
+ return false;
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand. This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ // All instructions input are two-addr instructions. Get the known operands.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ MachineInstr *NewMI = NULL;
+ // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
+ // we have better subtarget support, enable the 16-bit LEA generation here.
+ bool DisableLEA16 = true;
+
+ unsigned MIOpc = MI->getOpcode();
+ switch (MIOpc) {
+ case X86::SHUFPSrri: {
+ assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+ unsigned B = MI->getOperand(1).getReg();
+ unsigned C = MI->getOperand(2).getReg();
+ if (B != C) return 0;
+ unsigned A = MI->getOperand(0).getReg();
+ unsigned M = MI->getOperand(3).getImm();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+ .addReg(A, RegState::Define | getDeadRegState(isDead))
+ .addReg(B, getKillRegState(isKill)).addImm(M);
+ break;
+ }
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
+ break;
+ }
+ case X86::SHL32ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::LEA64_32r : X86::LEA32r;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill)).addImm(0);
+ break;
+ }
+ case X86::SHL16ri: {
+ assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+ // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+ // the flags produced by a shift yet, so this is safe.
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+ if (DisableLEA16) {
+ // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill)
+ .addImm(0);
+
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+ return ExtMI;
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
+ }
+ break;
+ }
+ default: {
+ // The following opcodes also sets the condition code register(s). Only
+ // convert them to equivalent lea if the condition code register def's
+ // are dead!
+ if (hasLiveCondCodeDef(MI))
+ return 0;
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ switch (MIOpc) {
+ default: return 0;
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::INC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, 1);
+ break;
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::DEC64_32r: {
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ }
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, -1);
+ break;
+ case X86::ADD64rr:
+ case X86::ADD32rr: {
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
+ : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD16rr: {
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, Src2, isKill2);
+ if (LV && isKill2)
+ LV->replaceKillInstruction(Src2, MI, NewMI);
+ break;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm())
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm()) {
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ }
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ if (DisableLEA16) return 0;
+ assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+ if (MI->getOperand(2).isImm())
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
+ break;
+ case X86::SHL16ri:
+ if (DisableLEA16) return 0;
+ case X86::SHL32ri:
+ case X86::SHL64ri: {
+ assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
+ "Unknown shl instruction!");
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
+ X86AddressMode AM;
+ AM.Scale = 1 << ShAmt;
+ AM.IndexReg = Src;
+ unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
+ : (MIOpc == X86::SHL32ri
+ ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
+ NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)), AM);
+ if (isKill)
+ NewMI->getOperand(3).setIsKill(true);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ if (!NewMI) return 0;
+
+ if (LV) { // Update live variables
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, NewMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, NewMI);
+ }
+
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ switch (MI->getOpcode()) {
+ case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+ case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+ case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+ case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+ case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+ case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+ unsigned Opc;
+ unsigned Size;
+ switch (MI->getOpcode()) {
+ default: assert(0 && "Unreachable!");
+ case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+ case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+ case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+ case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+ case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+ case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+ }
+ unsigned Amt = MI->getOperand(3).getImm();
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ MI->getOperand(3).setImm(Size-Amt);
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+ case X86::CMOVB16rr:
+ case X86::CMOVB32rr:
+ case X86::CMOVB64rr:
+ case X86::CMOVAE16rr:
+ case X86::CMOVAE32rr:
+ case X86::CMOVAE64rr:
+ case X86::CMOVE16rr:
+ case X86::CMOVE32rr:
+ case X86::CMOVE64rr:
+ case X86::CMOVNE16rr:
+ case X86::CMOVNE32rr:
+ case X86::CMOVNE64rr:
+ case X86::CMOVBE16rr:
+ case X86::CMOVBE32rr:
+ case X86::CMOVBE64rr:
+ case X86::CMOVA16rr:
+ case X86::CMOVA32rr:
+ case X86::CMOVA64rr:
+ case X86::CMOVL16rr:
+ case X86::CMOVL32rr:
+ case X86::CMOVL64rr:
+ case X86::CMOVGE16rr:
+ case X86::CMOVGE32rr:
+ case X86::CMOVGE64rr:
+ case X86::CMOVLE16rr:
+ case X86::CMOVLE32rr:
+ case X86::CMOVLE64rr:
+ case X86::CMOVG16rr:
+ case X86::CMOVG32rr:
+ case X86::CMOVG64rr:
+ case X86::CMOVS16rr:
+ case X86::CMOVS32rr:
+ case X86::CMOVS64rr:
+ case X86::CMOVNS16rr:
+ case X86::CMOVNS32rr:
+ case X86::CMOVNS64rr:
+ case X86::CMOVP16rr:
+ case X86::CMOVP32rr:
+ case X86::CMOVP64rr:
+ case X86::CMOVNP16rr:
+ case X86::CMOVNP32rr:
+ case X86::CMOVNP64rr:
+ case X86::CMOVO16rr:
+ case X86::CMOVO32rr:
+ case X86::CMOVO64rr:
+ case X86::CMOVNO16rr:
+ case X86::CMOVNO32rr:
+ case X86::CMOVNO64rr: {
+ unsigned Opc = 0;
+ switch (MI->getOpcode()) {
+ default: break;
+ case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
+ case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
+ case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
+ case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+ case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+ case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+ case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
+ case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
+ case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
+ case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+ case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+ case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+ case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
+ case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
+ case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
+ case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+ case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+ case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+ case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+ case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+ case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+ case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
+ case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
+ case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
+ case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
+ case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
+ case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
+ case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+ case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+ case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+ case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
+ case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
+ case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
+ case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+ case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+ case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+ case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
+ case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
+ case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
+ case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+ case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+ case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+ }
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ // Fallthrough intended.
+ }
+ default:
+ return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+ }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+ switch (BrOpc) {
+ default: return X86::COND_INVALID;
+ case X86::JE: return X86::COND_E;
+ case X86::JNE: return X86::COND_NE;
+ case X86::JL: return X86::COND_L;
+ case X86::JLE: return X86::COND_LE;
+ case X86::JG: return X86::COND_G;
+ case X86::JGE: return X86::COND_GE;
+ case X86::JB: return X86::COND_B;
+ case X86::JBE: return X86::COND_BE;
+ case X86::JA: return X86::COND_A;
+ case X86::JAE: return X86::COND_AE;
+ case X86::JS: return X86::COND_S;
+ case X86::JNS: return X86::COND_NS;
+ case X86::JP: return X86::COND_P;
+ case X86::JNP: return X86::COND_NP;
+ case X86::JO: return X86::COND_O;
+ case X86::JNO: return X86::COND_NO;
+ }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::JE;
+ case X86::COND_NE: return X86::JNE;
+ case X86::COND_L: return X86::JL;
+ case X86::COND_LE: return X86::JLE;
+ case X86::COND_G: return X86::JG;
+ case X86::COND_GE: return X86::JGE;
+ case X86::COND_B: return X86::JB;
+ case X86::COND_BE: return X86::JBE;
+ case X86::COND_A: return X86::JA;
+ case X86::COND_AE: return X86::JAE;
+ case X86::COND_S: return X86::JS;
+ case X86::COND_NS: return X86::JNS;
+ case X86::COND_P: return X86::JP;
+ case X86::COND_NP: return X86::JNP;
+ case X86::COND_O: return X86::JO;
+ case X86::COND_NO: return X86::JNO;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case X86::COND_E: return X86::COND_NE;
+ case X86::COND_NE: return X86::COND_E;
+ case X86::COND_L: return X86::COND_GE;
+ case X86::COND_LE: return X86::COND_G;
+ case X86::COND_G: return X86::COND_LE;
+ case X86::COND_GE: return X86::COND_L;
+ case X86::COND_B: return X86::COND_AE;
+ case X86::COND_BE: return X86::COND_A;
+ case X86::COND_A: return X86::COND_BE;
+ case X86::COND_AE: return X86::COND_B;
+ case X86::COND_S: return X86::COND_NS;
+ case X86::COND_NS: return X86::COND_S;
+ case X86::COND_P: return X86::COND_NP;
+ case X86::COND_NP: return X86::COND_P;
+ case X86::COND_O: return X86::COND_NO;
+ case X86::COND_NO: return X86::COND_O;
+ }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (TID.isBranch() && !TID.isBarrier())
+ return true;
+ if (!TID.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
+static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
+ const X86InstrInfo &TII) {
+ if (MI->getOpcode() == X86::FP_REG_KILL)
+ return false;
+ return TII.isUnpredicatedTerminator(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ // Working from the bottom, when we see a non-terminator
+ // instruction, we're done.
+ if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ break;
+ // A terminator that isn't a branch can't easily be handled
+ // by this analysis.
+ if (!I->getDesc().isBranch())
+ return true;
+ // Handle unconditional branches.
+ if (I->getOpcode() == X86::JMP) {
+ if (!AllowModify) {
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (next(I) != MBB.end())
+ next(I)->eraseFromParent();
+ Cond.clear();
+ FBB = 0;
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+ // TBB is used to indicate the unconditinal destination.
+ TBB = I->getOperand(0).getMBB();
+ continue;
+ }
+ // Handle conditional branches.
+ X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+ if (BranchCode == X86::COND_INVALID)
+ return true; // Can't handle indirect branch.
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ FBB = TBB;
+ TBB = I->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ continue;
+ }
+ // Handle subsequent conditional branches. Only handle the case
+ // where all conditional branches branch to the same destination
+ // and their condition opcodes fit one of the special
+ // multi-branch idioms.
+ assert(Cond.size() == 1);
+ assert(TBB);
+ // Only handle the case where all conditional branches branch to
+ // the same destination.
+ if (TBB != I->getOperand(0).getMBB())
+ return true;
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+ // If the conditions are the same, we can leave them alone.
+ if (OldBranchCode == BranchCode)
+ continue;
+ // If they differ, see if they fit one of the known patterns.
+ // Theoretically we could handle more patterns here, but
+ // we shouldn't expect to see them if instruction selection
+ // has done a reasonable job.
+ if ((OldBranchCode == X86::COND_NP &&
+ BranchCode == X86::COND_E) ||
+ (OldBranchCode == X86::COND_E &&
+ BranchCode == X86::COND_NP))
+ BranchCode = X86::COND_NP_OR_E;
+ else if ((OldBranchCode == X86::COND_P &&
+ BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE &&
+ BranchCode == X86::COND_P))
+ BranchCode = X86::COND_NE_OR_P;
+ else
+ return true;
+ // Update the MachineOperand.
+ Cond[0].setImm(BranchCode);
+ }
+
+ return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->getOpcode() != X86::JMP &&
+ GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ // FIXME this should probably have a DebugLoc operand
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "X86 branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+ switch (CC) {
+ case X86::COND_NP_OR_E:
+ // Synthesize NP_OR_E with two branches.
+ BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+ ++Count;
+ break;
+ case X86::COND_NE_OR_P:
+ // Synthesize NE_OR_P with two branches.
+ BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+ ++Count;
+ BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+ ++Count;
+ break;
+ default: {
+ unsigned Opc = GetCondBranchFromCond(CC);
+ BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
+ ++Count;
+ }
+ }
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+ return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ // Determine if DstRC and SrcRC have a common superclass in common.
+ const TargetRegisterClass *CommonRC = DestRC;
+ if (DestRC == SrcRC)
+ /* Source and destination have the same register class. */;
+ else if (CommonRC->hasSuperClass(SrcRC))
+ CommonRC = SrcRC;
+ else if (!DestRC->hasSubClass(SrcRC))
+ CommonRC = 0;
+
+ if (CommonRC) {
+ unsigned Opc;
+ if (CommonRC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
+ else
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rr;
+ } else if (CommonRC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rr;
+ } else if (CommonRC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rr;
+ } else if (CommonRC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rr;
+ } else if (CommonRC == &X86::RFP32RegClass) {
+ Opc = X86::MOV_Fp3232;
+ } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
+ Opc = X86::MOV_Fp6464;
+ } else if (CommonRC == &X86::RFP80RegClass) {
+ Opc = X86::MOV_Fp8080;
+ } else if (CommonRC == &X86::FR32RegClass) {
+ Opc = X86::FsMOVAPSrr;
+ } else if (CommonRC == &X86::FR64RegClass) {
+ Opc = X86::FsMOVAPDrr;
+ } else if (CommonRC == &X86::VR128RegClass) {
+ Opc = X86::MOVAPSrr;
+ } else if (CommonRC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rr;
+ } else {
+ return false;
+ }
+ BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
+ return true;
+ }
+
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ if (SrcRC == &X86::CCRRegClass) {
+ if (SrcReg != X86::EFLAGS)
+ return false;
+ if (DestRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHFQ));
+ BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ return true;
+ } else if (DestRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+ return true;
+ }
+ } else if (DestRC == &X86::CCRRegClass) {
+ if (DestReg != X86::EFLAGS)
+ return false;
+ if (SrcRC == &X86::GR64RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
+ BuildMI(MBB, MI, DL, get(X86::POPFQ));
+ return true;
+ } else if (SrcRC == &X86::GR32RegClass) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
+ BuildMI(MBB, MI, DL, get(X86::POPFD));
+ return true;
+ }
+ }
+
+ // Moving from ST(0) turns into FpGET_ST0_32 etc.
+ if (SrcRC == &X86::RSTRegClass) {
+ // Copying from ST(0)/ST(1).
+ if (SrcReg != X86::ST0 && SrcReg != X86::ST1)
+ // Can only copy from ST(0)/ST(1) right now
+ return false;
+ bool isST0 = SrcReg == X86::ST0;
+ unsigned Opc;
+ if (DestRC == &X86::RFP32RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
+ else if (DestRC == &X86::RFP64RegClass)
+ Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
+ else {
+ if (DestRC != &X86::RFP80RegClass)
+ return false;
+ Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ }
+ BuildMI(MBB, MI, DL, get(Opc), DestReg);
+ return true;
+ }
+
+ // Moving to ST(0) turns into FpSET_ST0_32 etc.
+ if (DestRC == &X86::RSTRegClass) {
+ // Copying to ST(0) / ST(1).
+ if (DestReg != X86::ST0 && DestReg != X86::ST1)
+ // Can only copy to TOS right now
+ return false;
+ bool isST0 = DestReg == X86::ST0;
+ unsigned Opc;
+ if (SrcRC == &X86::RFP32RegClass)
+ Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32;
+ else if (SrcRC == &X86::RFP64RegClass)
+ Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64;
+ else {
+ if (SrcRC != &X86::RFP80RegClass)
+ return false;
+ Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80;
+ }
+ BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg);
+ return true;
+ }
+
+ // Not yet supported!
+ return false;
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ unsigned Opc = 0;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(SrcReg) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8mr_NOREX;
+ else
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8mr_NOREX;
+ else
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64mr;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32mr;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16mr;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8mr;
+ } else if (RC == &X86::RFP80RegClass) {
+ Opc = X86::ST_FpP80m; // pops
+ } else if (RC == &X86::RFP64RegClass) {
+ Opc = X86::ST_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::ST_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSmr;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDmr;
+ } else if (RC == &X86::VR128RegClass) {
+ // If stack is realigned we can use aligned stores.
+ Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64mr;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+
+ return Opc;
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC) const {
+ const MachineFunction &MF = *MBB.getParent();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+ .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ MIB.addReg(SrcReg, getKillRegState(isKill));
+ NewMIs.push_back(MIB);
+}
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ unsigned Opc = 0;
+ if (RC == &X86::GR64RegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32RegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16RegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8RegClass) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if (isHReg(DestReg) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rm_NOREX;
+ else
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_ABCDRegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32_ABCDRegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16_ABCDRegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_ABCD_LRegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rm_NOREX;
+ else
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::GR64_NOREXRegClass) {
+ Opc = X86::MOV64rm;
+ } else if (RC == &X86::GR32_NOREXRegClass) {
+ Opc = X86::MOV32rm;
+ } else if (RC == &X86::GR16_NOREXRegClass) {
+ Opc = X86::MOV16rm;
+ } else if (RC == &X86::GR8_NOREXRegClass) {
+ Opc = X86::MOV8rm;
+ } else if (RC == &X86::RFP80RegClass) {
+ Opc = X86::LD_Fp80m;
+ } else if (RC == &X86::RFP64RegClass) {
+ Opc = X86::LD_Fp64m;
+ } else if (RC == &X86::RFP32RegClass) {
+ Opc = X86::LD_Fp32m;
+ } else if (RC == &X86::FR32RegClass) {
+ Opc = X86::MOVSSrm;
+ } else if (RC == &X86::FR64RegClass) {
+ Opc = X86::MOVSDrm;
+ } else if (RC == &X86::VR128RegClass) {
+ // If stack is realigned we can use aligned loads.
+ Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm;
+ } else if (RC == &X86::VR64RegClass) {
+ Opc = X86::MMX_MOVQ64rm;
+ } else {
+ assert(0 && "Unknown regclass");
+ abort();
+ }
+
+ return Opc;
+}
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC) const{
+ const MachineFunction &MF = *MBB.getParent();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+ for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+ MIB.addOperand(Addr[i]);
+ NewMIs.push_back(MIB);
+}
+
+bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ unsigned SlotSize = is64Bit ? 8 : 4;
+
+ MachineFunction &MF = *MBB.getParent();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+
+ unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(Reg, RegState::Kill);
+ }
+ return true;
+}
+
+bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ BuildMI(MBB, MI, DL, get(Opc), Reg);
+ }
+ return true;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI,
+ const TargetInstrInfo &TII) {
+ // Create the base instruction with the memory operand as the first part.
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+
+ // Loop over the rest of the ri operands, converting them over.
+ unsigned NumOps = MI->getDesc().getNumOperands()-2;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MachineOperand &MO = MI->getOperand(i+2);
+ MIB.addOperand(MO);
+ }
+ for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ MIB.addOperand(MO);
+ }
+ return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+ unsigned Opcode, unsigned OpNo,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+ MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(NewMI);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (i == OpNo) {
+ assert(MO.isReg() && "Expected to fold into reg operand!");
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+ const SmallVectorImpl<MachineOperand> &MOs,
+ MachineInstr *MI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+ unsigned NumAddrOps = MOs.size();
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ if (NumAddrOps < 4) // FrameIndex only
+ addOffset(MIB, 0);
+ return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI, unsigned i,
+ const SmallVectorImpl<MachineOperand> &MOs) const{
+ const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ bool isTwoAddrFold = false;
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ MachineInstr *NewMI = NULL;
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ if (isTwoAddr && NumOps >= 2 && i < 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ isTwoAddrFold = true;
+ } else if (i == 0) { // If operand 0
+ if (MI->getOpcode() == X86::MOV16r0)
+ NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV32r0)
+ NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV64r0)
+ NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+ else if (MI->getOpcode() == X86::MOV8r0)
+ NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+ if (NewMI)
+ return NewMI;
+
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (i == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (i == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ // If table selected...
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned*, unsigned>::iterator I =
+ OpcodeTablePtr->find((unsigned*)MI->getOpcode());
+ if (I != OpcodeTablePtr->end()) {
+ if (isTwoAddrFold)
+ NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
+ else
+ NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
+ return NewMI;
+ }
+ }
+
+ // No fusion
+ if (PrintFailedFusing)
+ cerr << "We failed to fuse operand " << i << " in " << *MI;
+ return NULL;
+}
+
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,4> MOs;
+ MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) const {
+ // Check switch flag
+ if (NoFusing) return NULL;
+
+ // Determine the alignment of the load.
+ unsigned Alignment = 0;
+ if (LoadMI->hasOneMemOperand())
+ Alignment = LoadMI->memoperands_begin()->getAlignment();
+
+ // FIXME: Move alignment requirement into tables?
+ if (Alignment < 16) {
+ switch (MI->getOpcode()) {
+ default: break;
+ // Not always safe to fold movsd into these instructions since their load
+ // folding variants expects the address to be 16 byte aligned.
+ case X86::FsANDNPDrr:
+ case X86::FsANDNPSrr:
+ case X86::FsANDPDrr:
+ case X86::FsANDPSrr:
+ case X86::FsORPDrr:
+ case X86::FsORPSrr:
+ case X86::FsXORPDrr:
+ case X86::FsXORPSrr:
+ return NULL;
+ }
+ }
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ unsigned NewOpc = 0;
+ switch (MI->getOpcode()) {
+ default: return NULL;
+ case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ }
+ // Change to CMPXXri r, 0 first.
+ MI->setDesc(get(NewOpc));
+ MI->getOperand(1).ChangeToImmediate(0);
+ } else if (Ops.size() != 1)
+ return NULL;
+
+ SmallVector<MachineOperand,X86AddrNumOperands> MOs;
+ if (LoadMI->getOpcode() == X86::V_SET0 ||
+ LoadMI->getOpcode() == X86::V_SETALLONES) {
+ // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Create a constant-pool entry and operands to load from it.
+
+ // x86-32 PIC requires a PIC base register for constant pools.
+ unsigned PICBase = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ !TM.getSubtarget<X86Subtarget>().is64Bit())
+ // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // This doesn't work for several reasons.
+ // 1. GlobalBaseReg may have been spilled.
+ // 2. It may not be live at MI.
+ return false;
+
+ // Create a v4i32 constant-pool entry.
+ MachineConstantPool &MCP = *MF.getConstantPool();
+ const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
+ Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
+ ConstantVector::getNullValue(Ty) :
+ ConstantVector::getAllOnesValue(Ty);
+ unsigned CPI = MCP.getConstantPoolIndex(C, 16);
+
+ // Create operands to load from the constant pool entry.
+ MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+ MOs.push_back(MachineOperand::CreateImm(1));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+ MOs.push_back(MachineOperand::CreateReg(0, false));
+ } else {
+ // Folding a normal load. Just copy the load's address operands.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
+ MOs.push_back(LoadMI->getOperand(i));
+ }
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ // Check switch flag
+ if (NoFusing) return 0;
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ return true;
+ }
+ }
+
+ if (Ops.size() != 1)
+ return false;
+
+ unsigned OpNum = Ops[0];
+ unsigned Opc = MI->getOpcode();
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ bool isTwoAddr = NumOps > 1 &&
+ MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // Folding a memory location into the two-address part of a two-address
+ // instruction is different than folding it other places. It requires
+ // replacing the *two* registers with the memory location.
+ const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+ if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+ } else if (OpNum == 0) { // If operand 0
+ switch (Opc) {
+ case X86::MOV16r0:
+ case X86::MOV32r0:
+ case X86::MOV64r0:
+ case X86::MOV8r0:
+ return true;
+ default: break;
+ }
+ OpcodeTablePtr = &RegOp2MemOpTable0;
+ } else if (OpNum == 1) {
+ OpcodeTablePtr = &RegOp2MemOpTable1;
+ } else if (OpNum == 2) {
+ OpcodeTablePtr = &RegOp2MemOpTable2;
+ }
+
+ if (OpcodeTablePtr) {
+ // Find the Opcode to fuse
+ DenseMap<unsigned*, unsigned>::iterator I =
+ OpcodeTablePtr->find((unsigned*)Opc);
+ if (I != OpcodeTablePtr->end())
+ return true;
+ }
+ return false;
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ DebugLoc dl = MI->getDebugLoc();
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return false;
+ UnfoldLoad &= FoldedLoad;
+ if (UnfoldStore && !FoldedStore)
+ return false;
+ UnfoldStore &= FoldedStore;
+
+ const TargetInstrDesc &TID = get(Opc);
+ const TargetOperandInfo &TOI = TID.OpInfo[Index];
+ const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
+ SmallVector<MachineOperand,2> BeforeOps;
+ SmallVector<MachineOperand,2> AfterOps;
+ SmallVector<MachineOperand,4> ImpOps;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (i >= Index && i < Index + X86AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (Op.isReg() && Op.isImplicit())
+ ImpOps.push_back(Op);
+ else if (i < Index)
+ BeforeOps.push_back(Op);
+ else if (i > Index)
+ AfterOps.push_back(Op);
+ }
+
+ // Emit the load instruction.
+ if (UnfoldLoad) {
+ loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
+ if (UnfoldStore) {
+ // Address operands cannot be marked isKill.
+ for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
+ MachineOperand &MO = NewMIs[0]->getOperand(i);
+ if (MO.isReg())
+ MO.setIsKill(false);
+ }
+ }
+ }
+
+ // Emit the data processing instruction.
+ MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(DataMI);
+
+ if (FoldedStore)
+ MIB.addReg(Reg, RegState::Define);
+ for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+ MIB.addOperand(BeforeOps[i]);
+ if (FoldedLoad)
+ MIB.addReg(Reg);
+ for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+ MIB.addOperand(AfterOps[i]);
+ for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+ MachineOperand &MO = ImpOps[i];
+ MIB.addReg(MO.getReg(),
+ getDefRegState(MO.isDef()) |
+ RegState::Implicit |
+ getKillRegState(MO.isKill()) |
+ getDeadRegState(MO.isDead()));
+ }
+ // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+ unsigned NewOpc = 0;
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32:
+ case X86::CMP32ri:
+ case X86::CMP16ri:
+ case X86::CMP8ri: {
+ MachineOperand &MO0 = DataMI->getOperand(0);
+ MachineOperand &MO1 = DataMI->getOperand(1);
+ if (MO1.getImm() == 0) {
+ switch (DataMI->getOpcode()) {
+ default: break;
+ case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
+ case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
+ }
+ DataMI->setDesc(get(NewOpc));
+ MO1.ChangeToRegister(MO0.getReg(), false);
+ }
+ }
+ }
+ NewMIs.push_back(DataMI);
+
+ // Emit the store instruction.
+ if (UnfoldStore) {
+ const TargetOperandInfo &DstTOI = TID.OpInfo[0];
+ const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+ storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
+ }
+
+ return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const {
+ if (!N->isMachineOpcode())
+ return false;
+
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
+ if (I == MemOp2RegOpTable.end())
+ return false;
+ unsigned Opc = I->second.first;
+ unsigned Index = I->second.second & 0xf;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ const TargetInstrDesc &TID = get(Opc);
+ const TargetOperandInfo &TOI = TID.OpInfo[Index];
+ const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
+ unsigned NumDefs = TID.NumDefs;
+ std::vector<SDValue> AddrOps;
+ std::vector<SDValue> BeforeOps;
+ std::vector<SDValue> AfterOps;
+ DebugLoc dl = N->getDebugLoc();
+ unsigned NumOps = N->getNumOperands();
+ for (unsigned i = 0; i != NumOps-1; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
+ AddrOps.push_back(Op);
+ else if (i < Index-NumDefs)
+ BeforeOps.push_back(Op);
+ else if (i > Index-NumDefs)
+ AfterOps.push_back(Op);
+ }
+ SDValue Chain = N->getOperand(NumOps-1);
+ AddrOps.push_back(Chain);
+
+ // Emit the load instruction.
+ SDNode *Load = 0;
+ const MachineFunction &MF = DAG.getMachineFunction();
+ if (FoldedLoad) {
+ MVT VT = *RC->vt_begin();
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Load);
+ }
+
+ // Emit the data processing instruction.
+ std::vector<MVT> VTs;
+ const TargetRegisterClass *DstRC = 0;
+ if (TID.getNumDefs() > 0) {
+ const TargetOperandInfo &DstTOI = TID.OpInfo[0];
+ DstRC = DstTOI.isLookupPtrRegClass()
+ ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
+ VTs.push_back(*DstRC->vt_begin());
+ }
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getValueType(i);
+ if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+ VTs.push_back(VT);
+ }
+ if (Load)
+ BeforeOps.push_back(SDValue(Load, 0));
+ std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
+ BeforeOps.size());
+ NewNodes.push_back(NewNode);
+
+ // Emit the store instruction.
+ if (FoldedStore) {
+ AddrOps.pop_back();
+ AddrOps.push_back(SDValue(NewNode, 0));
+ AddrOps.push_back(Chain);
+ bool isAligned = (RI.getStackAlignment() >= 16) ||
+ RI.needsStackRealignment(MF);
+ SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
+ isAligned, TM),
+ dl, MVT::Other,
+ &AddrOps[0], AddrOps.size());
+ NewNodes.push_back(Store);
+ }
+
+ return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore) const {
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ MemOp2RegOpTable.find((unsigned*)Opc);
+ if (I == MemOp2RegOpTable.end())
+ return 0;
+ bool FoldedLoad = I->second.second & (1 << 4);
+ bool FoldedStore = I->second.second & (1 << 5);
+ if (UnfoldLoad && !FoldedLoad)
+ return 0;
+ if (UnfoldStore && !FoldedStore)
+ return 0;
+ return I->second.first;
+}
+
+bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case X86::TCRETURNri:
+ case X86::TCRETURNdi:
+ case X86::RET: // Return.
+ case X86::RETI:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm:
+ case X86::JMP: // Uncond branch.
+ case X86::JMP32r: // Indirect branch.
+ case X86::JMP64r: // Indirect branch (64-bit).
+ case X86::JMP32m: // Indirect branch through mem.
+ case X86::JMP64m: // Indirect branch through mem (64-bit).
+ return true;
+ default: return false;
+ }
+}
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+ X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+ if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+ return true;
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+ // FIXME: Return false for x87 stack register classes for now. We can't
+ // allow any loads of these registers before FpGet_ST0_80.
+ return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+ RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
+ switch (Desc->TSFlags & X86II::ImmMask) {
+ case X86II::Imm8: return 1;
+ case X86II::Imm16: return 2;
+ case X86II::Imm32: return 4;
+ case X86II::Imm64: return 8;
+ default: assert(0 && "Immediate size not set!");
+ return 0;
+ }
+}
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
+/// e.g. r8, xmm8, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) {
+ if (!MO.isReg()) return false;
+ switch (MO.getReg()) {
+ default: break;
+ case X86::R8: case X86::R9: case X86::R10: case X86::R11:
+ case X86::R12: case X86::R13: case X86::R14: case X86::R15:
+ case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D:
+ case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D:
+ case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W:
+ case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W:
+ case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B:
+ case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
+ case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
+ case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ return true;
+ }
+ return false;
+}
+
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+/// sizePCRelativeBlockAddress - This method returns the size of a PC
+/// relative block address instruction
+///
+static unsigned sizePCRelativeBlockAddress() {
+ return 4;
+}
+
+/// sizeGlobalAddress - Give the size of the emission of this global address
+///
+static unsigned sizeGlobalAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeConstPoolAddress - Give the size of the emission of this constant
+/// pool address
+///
+static unsigned sizeConstPoolAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeExternalSymbolAddress - Give the size of the emission of this external
+/// symbol
+///
+static unsigned sizeExternalSymbolAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+/// sizeJumpTableAddress - Give the size of the emission of this jump
+/// table address
+///
+static unsigned sizeJumpTableAddress(bool dword) {
+ return dword ? 8 : 4;
+}
+
+static unsigned sizeConstant(unsigned Size) {
+ return Size;
+}
+
+static unsigned sizeRegModRMByte(){
+ return 1;
+}
+
+static unsigned sizeSIBByte(){
+ return 1;
+}
+
+static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
+ unsigned FinalSize = 0;
+ // If this is a simple integer displacement that doesn't require a relocation.
+ if (!RelocOp) {
+ FinalSize += sizeConstant(4);
+ return FinalSize;
+ }
+
+ // Otherwise, this is something that requires a relocation.
+ if (RelocOp->isGlobal()) {
+ FinalSize += sizeGlobalAddress(false);
+ } else if (RelocOp->isCPI()) {
+ FinalSize += sizeConstPoolAddress(false);
+ } else if (RelocOp->isJTI()) {
+ FinalSize += sizeJumpTableAddress(false);
+ } else {
+ assert(0 && "Unknown value to relocate!");
+ }
+ return FinalSize;
+}
+
+static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
+ bool IsPIC, bool Is64BitMode) {
+ const MachineOperand &Op3 = MI.getOperand(Op+3);
+ int DispVal = 0;
+ const MachineOperand *DispForReloc = 0;
+ unsigned FinalSize = 0;
+
+ // Figure out what sort of displacement we have to handle here.
+ if (Op3.isGlobal()) {
+ DispForReloc = &Op3;
+ } else if (Op3.isCPI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal = 1;
+ }
+ } else if (Op3.isJTI()) {
+ if (Is64BitMode || IsPIC) {
+ DispForReloc = &Op3;
+ } else {
+ DispVal = 1;
+ }
+ } else {
+ DispVal = 1;
+ }
+
+ const MachineOperand &Base = MI.getOperand(Op);
+ const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+ unsigned BaseReg = Base.getReg();
+
+ // Is a SIB byte needed?
+ if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
+ IndexReg.getReg() == 0 &&
+ (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
+ if (BaseReg == 0) { // Just a displacement?
+ // Emit special case [disp32] encoding
+ ++FinalSize;
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ } else {
+ unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
+ if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+ // Emit simple indirect register encoding... [EAX] f.e.
+ ++FinalSize;
+ // Be pessimistic and assume it's a disp32, not a disp8
+ } else {
+ // Emit the most general non-SIB encoding: [REG+disp32]
+ ++FinalSize;
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ }
+ }
+
+ } else { // We need a SIB byte, so start by outputting the ModR/M byte first
+ assert(IndexReg.getReg() != X86::ESP &&
+ IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+ bool ForceDisp32 = false;
+ if (BaseReg == 0 || DispForReloc) {
+ // Emit the normal disp32 encoding.
+ ++FinalSize;
+ ForceDisp32 = true;
+ } else {
+ ++FinalSize;
+ }
+
+ FinalSize += sizeSIBByte();
+
+ // Do we need to output a displacement?
+ if (DispVal != 0 || ForceDisp32) {
+ FinalSize += getDisplacementFieldSize(DispForReloc);
+ }
+ }
+ return FinalSize;
+}
+
+
+static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
+ const TargetInstrDesc *Desc,
+ bool IsPIC, bool Is64BitMode) {
+
+ unsigned Opcode = Desc->Opcode;
+ unsigned FinalSize = 0;
+
+ // Emit the lock opcode prefix as needed.
+ if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
+
+ // Emit segment override opcode prefix as needed.
+ switch (Desc->TSFlags & X86II::SegOvrMask) {
+ case X86II::FS:
+ case X86II::GS:
+ ++FinalSize;
+ break;
+ default: assert(0 && "Invalid segment!");
+ case 0: break; // No segment override!
+ }
+
+ // Emit the repeat opcode prefix as needed.
+ if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
+
+ // Emit the operand size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
+
+ // Emit the address size opcode prefix as needed.
+ if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
+
+ bool Need0FPrefix = false;
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::TB: // Two-byte opcode prefix
+ case X86II::T8: // 0F 38
+ case X86II::TA: // 0F 3A
+ Need0FPrefix = true;
+ break;
+ case X86II::REP: break; // already handled.
+ case X86II::XS: // F3 0F
+ ++FinalSize;
+ Need0FPrefix = true;
+ break;
+ case X86II::XD: // F2 0F
+ ++FinalSize;
+ Need0FPrefix = true;
+ break;
+ case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+ case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+ ++FinalSize;
+ break; // Two-byte opcode prefix
+ default: assert(0 && "Invalid prefix!");
+ case 0: break; // No prefix!
+ }
+
+ if (Is64BitMode) {
+ // REX prefix
+ unsigned REX = X86InstrInfo::determineREX(MI);
+ if (REX)
+ ++FinalSize;
+ }
+
+ // 0x0F escape code must be emitted just before the opcode.
+ if (Need0FPrefix)
+ ++FinalSize;
+
+ switch (Desc->TSFlags & X86II::Op0Mask) {
+ case X86II::T8: // 0F 38
+ ++FinalSize;
+ break;
+ case X86II::TA: // 0F 3A
+ ++FinalSize;
+ break;
+ }
+
+ // If this is a two-address instruction, skip one of the register operands.
+ unsigned NumOps = Desc->getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+ CurOp++;
+ else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+ // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+ --NumOps;
+
+ switch (Desc->TSFlags & X86II::FormMask) {
+ default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
+ case X86II::Pseudo:
+ // Remember the current PC offset, this is the PIC relocation
+ // base address.
+ switch (Opcode) {
+ default:
+ break;
+ case TargetInstrInfo::INLINEASM: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
+ const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo();
+ FinalSize += AI->getInlineAsmLength(AsmStr);
+ break;
+ }
+ case TargetInstrInfo::DBG_LABEL:
+ case TargetInstrInfo::EH_LABEL:
+ break;
+ case TargetInstrInfo::IMPLICIT_DEF:
+ case TargetInstrInfo::DECLARE:
+ case X86::DWARF_LOC:
+ case X86::FP_REG_KILL:
+ break;
+ case X86::MOVPC32r: {
+ // This emits the "call" portion of this pseudo instruction.
+ ++FinalSize;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ break;
+ }
+ }
+ CurOp = NumOps;
+ break;
+ case X86II::RawFrm:
+ ++FinalSize;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ if (MO.isMBB()) {
+ FinalSize += sizePCRelativeBlockAddress();
+ } else if (MO.isGlobal()) {
+ FinalSize += sizeGlobalAddress(false);
+ } else if (MO.isSymbol()) {
+ FinalSize += sizeExternalSymbolAddress(false);
+ } else if (MO.isImm()) {
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ } else {
+ assert(0 && "Unknown RawFrm operand!");
+ }
+ }
+ break;
+
+ case X86II::AddRegFrm:
+ ++FinalSize;
+ ++CurOp;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64ri)
+ dword = true;
+ if (MO1.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO1.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO1.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO1.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+
+ case X86II::MRMDestReg: {
+ ++FinalSize;
+ FinalSize += sizeRegModRMByte();
+ CurOp += 2;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+ case X86II::MRMDestMem: {
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
+ CurOp += X86AddrNumOperands + 1;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+
+ case X86II::MRMSrcReg:
+ ++FinalSize;
+ FinalSize += sizeRegModRMByte();
+ CurOp += 2;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+
+ case X86II::MRMSrcMem: {
+ int AddrOperands;
+ if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ AddrOperands = X86AddrNumOperands - 1; // No segment register
+ else
+ AddrOperands = X86AddrNumOperands;
+
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
+ CurOp += AddrOperands + 1;
+ if (CurOp != NumOps) {
+ ++CurOp;
+ FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc));
+ }
+ break;
+ }
+
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ ++FinalSize;
+ if (Desc->getOpcode() == X86::LFENCE ||
+ Desc->getOpcode() == X86::MFENCE) {
+ // Special handling of lfence and mfence;
+ FinalSize += sizeRegModRMByte();
+ } else if (Desc->getOpcode() == X86::MONITOR ||
+ Desc->getOpcode() == X86::MWAIT) {
+ // Special handling of monitor and mwait.
+ FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
+ } else {
+ ++CurOp;
+ FinalSize += sizeRegModRMByte();
+ }
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO1 = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO1.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64ri32)
+ dword = true;
+ if (MO1.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO1.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO1.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO1.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+
+ ++FinalSize;
+ FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
+ CurOp += X86AddrNumOperands;
+
+ if (CurOp != NumOps) {
+ const MachineOperand &MO = MI.getOperand(CurOp++);
+ unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+ if (MO.isImm())
+ FinalSize += sizeConstant(Size);
+ else {
+ bool dword = false;
+ if (Opcode == X86::MOV64mi32)
+ dword = true;
+ if (MO.isGlobal()) {
+ FinalSize += sizeGlobalAddress(dword);
+ } else if (MO.isSymbol())
+ FinalSize += sizeExternalSymbolAddress(dword);
+ else if (MO.isCPI())
+ FinalSize += sizeConstPoolAddress(dword);
+ else if (MO.isJTI())
+ FinalSize += sizeJumpTableAddress(dword);
+ }
+ }
+ break;
+ }
+
+ case X86II::MRMInitReg:
+ ++FinalSize;
+ // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+ FinalSize += sizeRegModRMByte();
+ ++CurOp;
+ break;
+ }
+
+ if (!Desc->isVariadic() && CurOp != NumOps) {
+ cerr << "Cannot determine size: ";
+ MI.dump();
+ cerr << '\n';
+ abort();
+ }
+
+
+ return FinalSize;
+}
+
+
+unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
+ bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
+ unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
+ if (Desc.getOpcode() == X86::MOVPC32r) {
+ Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
+ }
+ return Size;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC)
+ .addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
+ if (TM.getRelocationModel() == Reloc::PIC_ &&
+ TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ GlobalBaseReg =
+ RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ } else {
+ GlobalBaseReg = PC;
+ }
+
+ X86FI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 000000000000..e09769ee912c
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,461 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+ class X86RegisterInfo;
+ class X86TargetMachine;
+
+namespace X86 {
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches to
+ // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs.
+ COND_NE_OR_P,
+ COND_NP_OR_E,
+
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction types. These are the standard/most common forms for X86
+ // instructions.
+ //
+
+ // PseudoFrm - This represents an instruction that is a pseudo instruction
+ // or one that has not been implemented yet. It is illegal to code generate
+ // it, but tolerated for intermediate implementation stages.
+ Pseudo = 0,
+
+ /// Raw - This form is for instructions that don't have any operands, so
+ /// they are just a fixed opcode value, like 'leave'.
+ RawFrm = 1,
+
+ /// AddRegFrm - This form is used for instructions like 'push r32' that have
+ /// their one register operand added to their opcode.
+ AddRegFrm = 2,
+
+ /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is a register.
+ ///
+ MRMDestReg = 3,
+
+ /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a destination, which in this case is memory.
+ ///
+ MRMDestMem = 4,
+
+ /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is a register.
+ ///
+ MRMSrcReg = 5,
+
+ /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+ /// to specify a source, which in this case is memory.
+ ///
+ MRMSrcMem = 6,
+
+ /// MRM[0-7][rm] - These forms are used to represent instructions that use
+ /// a Mod/RM byte, and use the middle field to hold extended opcode
+ /// information. In the intel manual these are represented as /0, /1, ...
+ ///
+
+ // First, instructions that operate on a register r/m operand...
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, // Format /0 /1 /2 /3
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, // Format /4 /5 /6 /7
+
+ // Next, instructions that operate on a memory r/m operand...
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7
+
+ // MRMInitReg - This form is used for instructions whose source and
+ // destinations are the same register.
+ MRMInitReg = 32,
+
+ FormMask = 63,
+
+ //===------------------------------------------------------------------===//
+ // Actual flags...
+
+ // OpSize - Set if this instruction requires an operand size prefix (0x66),
+ // which most often indicates that the instruction operates on 16 bit data
+ // instead of 32 bit data.
+ OpSize = 1 << 6,
+
+ // AsSize - Set if this instruction requires an operand size prefix (0x67),
+ // which most often indicates that the instruction address 16 bit address
+ // instead of 32 bit address (or 32 bit address in 64 bit mode).
+ AdSize = 1 << 7,
+
+ //===------------------------------------------------------------------===//
+ // Op0Mask - There are several prefix bytes that are used to form two byte
+ // opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
+ // used to obtain the setting of this field. If no bits in this field is
+ // set, there is no prefix byte for obtaining a multibyte opcode.
+ //
+ Op0Shift = 8,
+ Op0Mask = 0xF << Op0Shift,
+
+ // TB - TwoByte - Set if this instruction has a two byte opcode, which
+ // starts with a 0x0F byte before the real opcode.
+ TB = 1 << Op0Shift,
+
+ // REP - The 0xF3 prefix byte indicating repetition of the following
+ // instruction.
+ REP = 2 << Op0Shift,
+
+ // D8-DF - These escape opcodes are used by the floating point unit. These
+ // values must remain sequential.
+ D8 = 3 << Op0Shift, D9 = 4 << Op0Shift,
+ DA = 5 << Op0Shift, DB = 6 << Op0Shift,
+ DC = 7 << Op0Shift, DD = 8 << Op0Shift,
+ DE = 9 << Op0Shift, DF = 10 << Op0Shift,
+
+ // XS, XD - These prefix codes are for single and double precision scalar
+ // floating point operations performed in the SSE registers.
+ XD = 11 << Op0Shift, XS = 12 << Op0Shift,
+
+ // T8, TA - Prefix after the 0x0F prefix.
+ T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
+
+ //===------------------------------------------------------------------===//
+ // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+ // They are used to specify GPRs and SSE registers, 64-bit operand size,
+ // etc. We only cares about REX.W and REX.R bits and only the former is
+ // statically determined.
+ //
+ REXShift = 12,
+ REX_W = 1 << REXShift,
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the size of an immediate operand. Zero is
+ // unused so that we can tell if we forgot to set a value.
+ ImmShift = 13,
+ ImmMask = 7 << ImmShift,
+ Imm8 = 1 << ImmShift,
+ Imm16 = 2 << ImmShift,
+ Imm32 = 3 << ImmShift,
+ Imm64 = 4 << ImmShift,
+
+ //===------------------------------------------------------------------===//
+ // FP Instruction Classification... Zero is non-fp instruction.
+
+ // FPTypeMask - Mask for all of the FP types...
+ FPTypeShift = 16,
+ FPTypeMask = 7 << FPTypeShift,
+
+ // NotFP - The default, set for instructions that do not use FP registers.
+ NotFP = 0 << FPTypeShift,
+
+ // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+ ZeroArgFP = 1 << FPTypeShift,
+
+ // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+ OneArgFP = 2 << FPTypeShift,
+
+ // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+ // result back to ST(0). For example, fcos, fsqrt, etc.
+ //
+ OneArgFPRW = 3 << FPTypeShift,
+
+ // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+ // explicit argument, storing the result to either ST(0) or the implicit
+ // argument. For example: fadd, fsub, fmul, etc...
+ TwoArgFP = 4 << FPTypeShift,
+
+ // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+ // explicit argument, but have no destination. Example: fucom, fucomi, ...
+ CompareFP = 5 << FPTypeShift,
+
+ // CondMovFP - "2 operand" floating point conditional move instructions.
+ CondMovFP = 6 << FPTypeShift,
+
+ // SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
+ SpecialFP = 7 << FPTypeShift,
+
+ // Lock prefix
+ LOCKShift = 19,
+ LOCK = 1 << LOCKShift,
+
+ // Segment override prefixes. Currently we just need ability to address
+ // stuff in gs and fs segments.
+ SegOvrShift = 20,
+ SegOvrMask = 3 << SegOvrShift,
+ FS = 1 << SegOvrShift,
+ GS = 2 << SegOvrShift,
+
+ // Bits 22 -> 23 are unused
+ OpcodeShift = 24,
+ OpcodeMask = 0xFF << OpcodeShift
+ };
+}
+
+const int X86AddrNumOperands = 5;
+
+inline static bool isScale(const MachineOperand &MO) {
+ return MO.isImm() &&
+ (MO.getImm() == 1 || MO.getImm() == 2 ||
+ MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+4 <= MI->getNumOperands() &&
+ MI->getOperand(Op ).isReg() && isScale(MI->getOperand(Op+1)) &&
+ MI->getOperand(Op+2).isReg() &&
+ (MI->getOperand(Op+3).isImm() ||
+ MI->getOperand(Op+3).isGlobal() ||
+ MI->getOperand(Op+3).isCPI() ||
+ MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+ if (MI->getOperand(Op).isFI()) return true;
+ return Op+5 <= MI->getNumOperands() &&
+ MI->getOperand(Op+4).isReg() &&
+ isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public TargetInstrInfoImpl {
+ X86TargetMachine &TM;
+ const X86RegisterInfo RI;
+
+ /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+ /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+ ///
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable0;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable1;
+ DenseMap<unsigned*, unsigned> RegOp2MemOpTable2;
+
+ /// MemOp2RegOpTable - Load / store unfolding opcode map.
+ ///
+ DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+public:
+ explicit X86InstrInfo(X86TargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+ bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, const MachineInstr *Orig) const;
+
+ bool isInvariantLoad(const MachineInstr *MI) const;
+
+ /// convertToThreeAddress - This method must be implemented by targets that
+ /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
+ /// may be able to convert a two-address instruction into a true
+ /// three-address instruction on demand. This allows the X86 target (for
+ /// example) to convert ADD and SHL instructions into LEA instructions if they
+ /// would require register copies due to two-addressness.
+ ///
+ /// This method returns a null pointer if the transformation cannot be
+ /// performed, otherwise it returns the new instruction.
+ ///
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
+ /// commuteInstruction - We have a few instructions that must be hacked on to
+ /// commute them.
+ ///
+ virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+ // Branch analysis.
+ virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ /// foldMemoryOperand - If this target supports it, fold a load or store of
+ /// the specified stack slot into the specified machine instruction for the
+ /// specified operand(s). If this is possible, the target should perform the
+ /// folding and return true, otherwise it should return false. If it folds
+ /// the instruction, it is likely that the MachineInstruction the iterator
+ /// references has been changed.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ /// foldMemoryOperand - Same as the previous version except it allows folding
+ /// of any load and store from / to any address, not just from a specific
+ /// stack slot.
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const;
+
+ /// canFoldMemoryOperand - Returns true if the specified load / store is
+ /// folding is possible.
+ virtual bool canFoldMemoryOperand(const MachineInstr*,
+ const SmallVectorImpl<unsigned> &) const;
+
+ /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+ /// a store or a load and a store into two or more instruction. If this is
+ /// possible, returns true as well as the new instructions by reference.
+ virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+ SmallVectorImpl<SDNode*> &NewNodes) const;
+
+ /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+ /// instruction after load / store are unfolded from an instruction of the
+ /// specified opcode. It returns zero if the specified unfolding is not
+ /// possible.
+ virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+ bool UnfoldLoad, bool UnfoldStore) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+ /// instruction that defines the specified register class.
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+ // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+ // specified machine instruction.
+ //
+ unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const {
+ return TID->TSFlags >> X86II::OpcodeShift;
+ }
+ unsigned char getBaseOpcodeFor(unsigned Opcode) const {
+ return getBaseOpcodeFor(&get(Opcode));
+ }
+
+ static bool isX86_64NonExtLowByteReg(unsigned reg) {
+ return (reg == X86::SPL || reg == X86::BPL ||
+ reg == X86::SIL || reg == X86::DIL);
+ }
+
+ static unsigned sizeOfImm(const TargetInstrDesc *Desc);
+ static bool isX86_64ExtendedReg(const MachineOperand &MO);
+ static unsigned determineREX(const MachineInstr &MI);
+
+ /// GetInstSize - Returns the size of the specified MachineInstr.
+ ///
+ virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+private:
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ unsigned OpNum,
+ const SmallVectorImpl<MachineOperand> &MOs) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 000000000000..50ae41764151
--- /dev/null
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,3961 @@
+//===- X86InstrInfo.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDTX86Cmov : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<1, 1,
+ [SDTCisInt<0>]>;
+def SDTBinaryArithWithFlags : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>]>;
+def SDTX86BrCond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+ SDTCisVT<2, i8>]>;
+def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>;
+def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
+def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
+
+def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
+def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
+def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
+ [SDNPHasChain]>;
+def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def X86callseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def X86callseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def X86call : SDNode<"X86ISD::CALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call,
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
+ SDNPMayLoad]>;
+
+def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
+ [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
+
+def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress",
+ SDT_X86SegmentBaseAddress, []>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+ [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags>;
+def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
+def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
+def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+ let PrintMethod = printMethod;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+
+def i8mem : X86MemOperand<"printi8mem">;
+def i16mem : X86MemOperand<"printi16mem">;
+def i32mem : X86MemOperand<"printi32mem">;
+def i64mem : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def f32mem : X86MemOperand<"printf32mem">;
+def f64mem : X86MemOperand<"printf64mem">;
+def f80mem : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+ let PrintMethod = "printi8mem";
+ let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+}
+
+def lea32mem : Operand<i32> {
+ let PrintMethod = "printlea32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
+}
+
+def SSECC : Operand<i8> {
+ let PrintMethod = "printSSECC";
+}
+
+def piclabel: Operand<i32> {
+ let PrintMethod = "printPICLabel";
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm : Operand<i16>;
+// 32-bits but only 8 bits are significant.
+def i32i8imm : Operand<i32>;
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
+def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+ [add, sub, mul, shl, or, frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasMMX : Predicate<"Subtarget->hasMMX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
+def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
+def In64BitMode : Predicate<"Subtarget->is64Bit()">;
+def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
+def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
+def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS : PatLeaf<(i8 12)>;
+def X86_COND_O : PatLeaf<(i8 13)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S : PatLeaf<(i8 15)>;
+
+def i16immSExt8 : PatLeaf<(i16 imm), [{
+ // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+def i32immSExt8 : PatLeaf<(i32 imm), [{
+ // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+ // sign extended field.
+ return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (const Value *Src = LD->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ if (LD->isVolatile())
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4;
+ return false;
+}]>;
+
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 256;
+ return false;
+}]>;
+
+def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 257;
+ return false;
+}]>;
+
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+
+def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+
+def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>;
+def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+
+def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>;
+def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+ return N->hasOneUse();
+}]>;
+
+// 'shld' and 'shrd' instruction patterns. Note that even though these have
+// the srl and shl in their patterns, the C++ code must still check for them,
+// because predicates are tested before children nodes are explored.
+
+def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (srl node:$src1, node:$amt1),
+ (shl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SRL &&
+ N->getOperand(1).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (shl node:$src1, node:$amt1),
+ (srl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SHL &&
+ N->getOperand(1).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list...
+//
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
+}
+
+// Nop
+let neverHasSideEffects = 1 in
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+
+// PIC base
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+ def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label),
+ "call\t$label\n\t"
+ "pop{l}\t$reg", []>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions...
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
+ def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ "ret",
+ [(X86retflag 0)]>;
+ def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ "ret\t$amt",
+ [(X86retflag imm:$amt)]>;
+}
+
+// All branches are RawFrm, Void, Branch, and Terminators
+let isBranch = 1, isTerminator = 1 in
+ class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
+ I<opcode, RawFrm, (outs), ins, asm, pattern>;
+
+let isBranch = 1, isBarrier = 1 in
+ def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+ def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+ [(brind GR32:$dst)]>;
+ def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+ [(brind (loadi32 addr:$dst))]>;
+}
+
+// Conditional branches
+let Uses = [EFLAGS] in {
+def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
+ [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
+def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB;
+def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
+ [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB;
+def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
+ [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB;
+def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
+ [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB;
+def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
+ [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB;
+
+def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
+ [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB;
+def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
+ [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB;
+def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
+ [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB;
+def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
+ [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB;
+
+def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
+ [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB;
+def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB;
+def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
+ [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB;
+def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB;
+def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
+ [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB;
+def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
+ [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
+} // Uses = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Call Instructions...
+//
+let isCall = 1 in
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
+ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
+ def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
+ "call\t${dst:call}", []>;
+ def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call GR32:$dst)]>;
+ def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+ "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
+ }
+
+// Tail call stuff.
+
+def TAILCALL : I<0, Pseudo, (outs), (ins),
+ "#TAILCALL",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_ops),
+ "#TC_RETURN $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+
+ def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL",
+ []>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+ def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
+ "jmp\t{*}$dst # TAILCALL", []>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions...
+//
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE : I<0xC9, RawFrm,
+ (outs), (ins), "leave", []>;
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+}
+
+let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
+def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
+let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf", []>;
+
+let isTwoAddress = 1 in // GR32 = bswap GR32
+ def BSWAP32r : I<0xC8, AddRegFrm,
+ (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+let neverHasSideEffects = 1 in
+def LEA16r : I<0x8D, MRMSrcMem,
+ (outs GR16:$dst), (ins i32mem:$src),
+ "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r : I<0x8D, MRMSrcMem,
+ (outs GR32:$dst), (ins lea32mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}",
+ [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+ [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>, REP;
+
+let Defs = [RAX, RDX] in
+def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
+ TB;
+
+let isBarrier = 1, hasCtrlDep = 1 in {
+def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// Input/Output Instructions...
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr : I<0xEC, RawFrm, (outs), (ins),
+ "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+ "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+ "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port),
+ "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr : I<0xEE, RawFrm, (outs), (ins),
+ "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+ "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port),
+ "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions...
+//
+let neverHasSideEffects = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>;
+}
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm:$src), addr:$dst)]>;
+
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))]>;
+}
+
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+let mayLoad = 1,
+ canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+
+//===----------------------------------------------------------------------===//
+// Fixed-Register Multiplication and Division Instructions...
+//
+
+// Extra precision multiplication
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*GR32
+
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+ "mul{b}\t$src",
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>;
+ // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
+ OpSize; // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>;
+ // EAX,EDX = EAX*GR32
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+ "imul{b}\t$src", []>; // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+ "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+ "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+}
+} // neverHasSideEffects
+
+// unsigned division/remainder
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "div{l}\t$src", []>;
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "div{l}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
+ "idiv{l}\t$src", []>;
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
+ "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
+ "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
+ "idiv{l}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+let isTwoAddress = 1 in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+let isCommutable = 1 in {
+def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_B, EFLAGS))]>,
+ TB, OpSize;
+def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_B, EFLAGS))]>,
+ TB;
+def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_AE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_AE, EFLAGS))]>,
+ TB;
+def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_E, EFLAGS))]>,
+ TB, OpSize;
+def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_E, EFLAGS))]>,
+ TB;
+def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NE, EFLAGS))]>,
+ TB;
+def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_BE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_BE, EFLAGS))]>,
+ TB;
+def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_A, EFLAGS))]>,
+ TB, OpSize;
+def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_A, EFLAGS))]>,
+ TB;
+def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_L, EFLAGS))]>,
+ TB, OpSize;
+def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_L, EFLAGS))]>,
+ TB;
+def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_GE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_GE, EFLAGS))]>,
+ TB;
+def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_LE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_LE, EFLAGS))]>,
+ TB;
+def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_G, EFLAGS))]>,
+ TB, OpSize;
+def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_G, EFLAGS))]>,
+ TB;
+def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_S, EFLAGS))]>,
+ TB, OpSize;
+def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_S, EFLAGS))]>,
+ TB;
+def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NS, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NS, EFLAGS))]>,
+ TB;
+def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_P, EFLAGS))]>,
+ TB, OpSize;
+def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_P, EFLAGS))]>,
+ TB;
+def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NP, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NP, EFLAGS))]>,
+ TB;
+def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NO, EFLAGS))]>,
+ TB;
+} // isCommutable = 1
+
+def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_B, EFLAGS))]>,
+ TB, OpSize;
+def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovb\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_B, EFLAGS))]>,
+ TB;
+def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_AE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovae\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_AE, EFLAGS))]>,
+ TB;
+def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_E, EFLAGS))]>,
+ TB, OpSize;
+def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmove\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_E, EFLAGS))]>,
+ TB;
+def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovne\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NE, EFLAGS))]>,
+ TB;
+def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_BE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovbe\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_BE, EFLAGS))]>,
+ TB;
+def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_A, EFLAGS))]>,
+ TB, OpSize;
+def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmova\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_A, EFLAGS))]>,
+ TB;
+def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_L, EFLAGS))]>,
+ TB, OpSize;
+def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovl\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_L, EFLAGS))]>,
+ TB;
+def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_GE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovge\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_GE, EFLAGS))]>,
+ TB;
+def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_LE, EFLAGS))]>,
+ TB, OpSize;
+def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovle\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_LE, EFLAGS))]>,
+ TB;
+def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_G, EFLAGS))]>,
+ TB, OpSize;
+def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovg\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_G, EFLAGS))]>,
+ TB;
+def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_S, EFLAGS))]>,
+ TB, OpSize;
+def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovs\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_S, EFLAGS))]>,
+ TB;
+def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NS, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovns\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NS, EFLAGS))]>,
+ TB;
+def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_P, EFLAGS))]>,
+ TB, OpSize;
+def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_P, EFLAGS))]>,
+ TB;
+def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NP, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NP, EFLAGS))]>,
+ TB;
+def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB;
+} // Uses = [EFLAGS]
+
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src)),
+ (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src)),
+ (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src)),
+ (implicit EFLAGS)]>;
+let isTwoAddress = 0 in {
+ def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+ [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+ def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+ [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+ [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+}
+} // Defs = [EFLAGS]
+
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src))]>;
+}
+let isTwoAddress = 0 in {
+ def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+ [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+ def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+ [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+ [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+}
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let CodeSize = 2 in
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
+ [(set GR8:$dst, (add GR8:$src, 1)),
+ (implicit EFLAGS)]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, 1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "inc{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, 1)),
+ (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+}
+let isTwoAddress = 0, CodeSize = 2 in {
+ def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+}
+
+let CodeSize = 2 in
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
+ [(set GR8:$dst, (add GR8:$src, -1)),
+ (implicit EFLAGS)]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), "dec{w}\t$dst",
+ [(set GR16:$dst, (add GR16:$src, -1)),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst",
+ [(set GR32:$dst, (add GR32:$src, -1)),
+ (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+}
+
+let isTwoAddress = 0, CodeSize = 2 in {
+ def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+ [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>;
+ def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In32BitMode]>;
+ def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In32BitMode]>;
+}
+} // Defs = [EFLAGS]
+
+// Logical operators...
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
+def AND8rr : I<0x20, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def AND16rr : I<0x21, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def AND32rr : I<0x21, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+}
+
+def AND8rm : I<0x22, MRMSrcMem,
+ (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))),
+ (implicit EFLAGS)]>;
+def AND16rm : I<0x23, MRMSrcMem,
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def AND32rm : I<0x23, MRMSrcMem,
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def AND8ri : Ii8<0x80, MRM4r,
+ (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def AND16ri : Ii16<0x81, MRM4r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def AND32ri : Ii32<0x81, MRM4r,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def AND16ri8 : Ii8<0x83, MRM4r,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>,
+ OpSize;
+def AND32ri8 : Ii8<0x83, MRM4r,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ def AND8mr : I<0x20, MRMDestMem,
+ (outs), (ins i8mem :$dst, GR8 :$src),
+ "and{b}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mr : I<0x21, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mr : I<0x21, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND8mi : Ii8<0x80, MRM4m,
+ (outs), (ins i8mem :$dst, i8imm :$src),
+ "and{b}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mi : Ii16<0x81, MRM4m,
+ (outs), (ins i16mem:$dst, i16imm:$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mi : Ii32<0x81, MRM4m,
+ (outs), (ins i32mem:$dst, i32imm:$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def AND16mi8 : Ii8<0x83, MRM4m,
+ (outs), (ins i16mem:$dst, i16i8imm :$src),
+ "and{w}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def AND32mi8 : Ii8<0x83, MRM4m,
+ (outs), (ins i32mem:$dst, i32i8imm :$src),
+ "and{l}\t{$src, $dst|$dst, $src}",
+ [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+
+let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
+def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+}
+def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "or{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+
+def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "or{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "or{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+let isTwoAddress = 0 in {
+ def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "or{b}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "or{b}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src),
+ "or{w}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
+ "or{l}\t{$src, $dst|$dst, $src}",
+ [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // isTwoAddress = 0
+
+
+let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
+ def XOR8rr : I<0x30, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+ def XOR16rr : I<0x31, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+ def XOR32rr : I<0x31, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+} // isCommutable = 1
+
+def XOR8rm : I<0x32, MRMSrcMem ,
+ (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def XOR16rm : I<0x33, MRMSrcMem ,
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>,
+ OpSize;
+def XOR32rm : I<0x33, MRMSrcMem ,
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+def XOR8ri : Ii8<0x80, MRM6r,
+ (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def XOR16ri : Ii16<0x81, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def XOR32ri : Ii32<0x81, MRM6r,
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def XOR16ri8 : Ii8<0x83, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>,
+ OpSize;
+def XOR32ri8 : Ii8<0x83, MRM6r,
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ def XOR8mr : I<0x30, MRMDestMem,
+ (outs), (ins i8mem :$dst, GR8 :$src),
+ "xor{b}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mr : I<0x31, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mr : I<0x31, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), GR32:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR8mi : Ii8<0x80, MRM6m,
+ (outs), (ins i8mem :$dst, i8imm :$src),
+ "xor{b}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mi : Ii16<0x81, MRM6m,
+ (outs), (ins i16mem:$dst, i16imm:$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mi : Ii32<0x81, MRM6m,
+ (outs), (ins i32mem:$dst, i32imm:$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+ def XOR16mi8 : Ii8<0x83, MRM6m,
+ (outs), (ins i16mem:$dst, i16i8imm :$src),
+ "xor{w}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize;
+ def XOR32mi8 : Ii8<0x83, MRM6m,
+ (outs), (ins i32mem:$dst, i32i8imm :$src),
+ "xor{l}\t{$src, $dst|$dst, $src}",
+ [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
+ (implicit EFLAGS)]>;
+} // isTwoAddress = 0
+} // Defs = [EFLAGS]
+
+// Shift instructions
+let Defs = [EFLAGS] in {
+let Uses = [CL] in {
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+ "shl{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (shl GR8:$src, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+ "shl{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+ "shl{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (shl GR32:$src, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "shl{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shl{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shl{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
+// cheaper.
+} // isConvertibleToThreeAddress = 1
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shl{b}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shl{w}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shl{l}\t{$src, $dst|$dst, $src}",
+ [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+ "shl{b}\t$dst",
+ [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+ "shl{w}\t$dst",
+ [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+ "shl{l}\t$dst",
+ [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+ "shr{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (srl GR8:$src, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+ "shr{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+ "shr{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (srl GR32:$src, CL))]>;
+}
+
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "shr{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "shr{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "shr{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+ "shr{b}\t$dst",
+ [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+ "shr{w}\t$dst",
+ [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+ "shr{l}\t$dst",
+ [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+ OpSize;
+ def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "shr{b}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "shr{w}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "shr{l}\t{$src, $dst|$dst, $src}",
+ [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+ "shr{b}\t$dst",
+ [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+ "shr{w}\t$dst",
+ [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+ def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+ "shr{l}\t$dst",
+ [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+ "sar{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (sra GR8:$src, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+ "sar{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+ "sar{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (sra GR32:$src, CL))]>;
+}
+
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "sar{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "sar{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+ OpSize;
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "sar{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "sar{b}\t$dst",
+ [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+ "sar{w}\t$dst",
+ [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+ "sar{l}\t$dst",
+ [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "sar{b}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "sar{w}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "sar{l}\t{$src, $dst|$dst, $src}",
+ [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Shift by 1
+ def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+ "sar{b}\t$dst",
+ [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+ "sar{w}\t$dst",
+ [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+ "sar{l}\t$dst",
+ [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+// Rotate instructions
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+ "rol{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotl GR8:$src, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+ "rol{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+ "rol{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+}
+
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "rol{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "rol{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "rol{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "rol{b}\t$dst",
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "rol{w}\t$dst",
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "rol{l}\t$dst",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "rol{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "rol{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "rol{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+ "rol{b}\t$dst",
+ [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+ "rol{w}\t$dst",
+ [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+ "rol{l}\t$dst",
+ [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+let Uses = [CL] in {
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+ "ror{b}\t{%cl, $dst|$dst, %CL}",
+ [(set GR8:$dst, (rotr GR8:$src, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+ "ror{w}\t{%cl, $dst|$dst, %CL}",
+ [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+ "ror{l}\t{%cl, $dst|$dst, %CL}",
+ [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+}
+
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+ "ror{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+ "ror{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+ "ror{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "ror{b}\t$dst",
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "ror{w}\t$dst",
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "ror{l}\t$dst",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+ def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+ def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t{%cl, $dst|$dst, %CL}",
+ [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+ }
+ def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+ "ror{b}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+ def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+ "ror{w}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+ OpSize;
+ def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+ "ror{l}\t{$src, $dst|$dst, $src}",
+ [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+ // Rotate by 1
+ def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+ "ror{b}\t$dst",
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+ "ror{w}\t$dst",
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+ OpSize;
+ def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+ "ror{l}\t$dst",
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+}
+
+
+
+// Double shift instructions (generalizations of rotate)
+let Uses = [CL] in {
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+ TB, OpSize;
+}
+
+let isCommutable = 1 in { // These instructions commute to each other.
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+ (i8 imm:$src3)))]>,
+ TB;
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+ (i8 imm:$src3)))]>,
+ TB, OpSize;
+}
+
+let isTwoAddress = 0 in {
+ let Uses = [CL] in {
+ def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+ def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+ addr:$dst)]>, TB;
+ }
+ def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB;
+
+ let Uses = [CL] in {
+ def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+ def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+ addr:$dst)]>, TB, OpSize;
+ }
+ def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+ (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+ (i8 imm:$src3)), addr:$dst)]>,
+ TB, OpSize;
+}
+} // Defs = [EFLAGS]
+
+
+// Arithmetic.
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
+// Register-Register Addition
+def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
+ (ins GR8 :$src1, GR8 :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Register Addition
+def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+} // end isConvertibleToThreeAddress
+} // end isCommutable
+
+// Register-Memory Addition
+def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
+ (ins GR8 :$src1, i8mem :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Addition
+def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Integer Addition
+def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+}
+
+let isTwoAddress = 0 in {
+ // Memory-Register Addition
+ def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "add{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "add{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (add (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+let Uses = [EFLAGS] in {
+let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
+def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>;
+def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize;
+def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
+}
+def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
+ (ins GR8:$src1, i8mem:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>;
+def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
+def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>;
+def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>;
+def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize;
+def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>;
+def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2),
+ "adc{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "adc{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "adc{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+} // Uses = [EFLAGS]
+
+// Register-Register Subtraction
+def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Memory Subtraction
+def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
+ (ins GR8 :$src1, i8mem :$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Subtraction
+def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
+ (ins GR8:$src1, i8imm:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+let isTwoAddress = 0 in {
+ // Memory-Register Subtraction
+ def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+ // Memory-Integer Subtraction
+ def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>;
+ def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sub (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
+}
+
+let Uses = [EFLAGS] in {
+def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
+ (ins GR8:$src1, GR8:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>;
+def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst),
+ (ins GR16:$src1, GR16:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize;
+def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
+ (ins GR32:$src1, GR32:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
+
+let isTwoAddress = 0 in {
+ def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ def SBB8mi : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
+ OpSize;
+ def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+}
+def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>;
+def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
+def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>;
+def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "sbb{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>;
+def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize;
+def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
+ "sbb{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
+def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>;
+def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
+ "sbb{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>;
+} // Uses = [EFLAGS]
+} // Defs = [EFLAGS]
+
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "imul{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "imul{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+} // end Two Address instructions
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
+ (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
+ (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
+ (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
+ (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, (mul (load addr:$src1),
+ i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
+ (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (mul (load addr:$src1),
+ i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+//===----------------------------------------------------------------------===//
+// Test instructions are just like AND, except they don't generate a result.
+//
+let Defs = [EFLAGS] in {
+let isCommutable = 1 in { // TEST X, Y --> TEST Y, X
+def TEST8rr : I<0x84, MRMDestReg, (outs), (ins GR8:$src1, GR8:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR8:$src1, GR8:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16rr : I<0x85, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR16:$src1, GR16:$src2), 0),
+ (implicit EFLAGS)]>,
+ OpSize;
+def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR32:$src1, GR32:$src2), 0),
+ (implicit EFLAGS)]>;
+}
+
+def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0),
+ (implicit EFLAGS)]>;
+
+def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8
+ (outs), (ins GR8:$src1, i8imm:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR8:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16
+ (outs), (ins GR16:$src1, i16imm:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR16:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and_su GR32:$src1, imm:$src2), 0),
+ (implicit EFLAGS)]>;
+
+def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8
+ (outs), (ins i8mem:$src1, i8imm:$src2),
+ "test{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>;
+def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16
+ (outs), (ins i16mem:$src1, i16imm:$src2),
+ "test{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>, OpSize;
+def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32
+ (outs), (ins i32mem:$src1, i32imm:$src2),
+ "test{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
+
+let Uses = [EFLAGS] in {
+def SETEr : I<0x94, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "sete\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>,
+ TB; // GR8 = ==
+def SETEm : I<0x94, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "sete\t$dst",
+ [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = ==
+
+def SETNEr : I<0x95, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setne\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>,
+ TB; // GR8 = !=
+def SETNEm : I<0x95, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setne\t$dst",
+ [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = !=
+
+def SETLr : I<0x9C, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setl\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>,
+ TB; // GR8 = < signed
+def SETLm : I<0x9C, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setl\t$dst",
+ [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = < signed
+
+def SETGEr : I<0x9D, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setge\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>,
+ TB; // GR8 = >= signed
+def SETGEm : I<0x9D, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setge\t$dst",
+ [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = >= signed
+
+def SETLEr : I<0x9E, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setle\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>,
+ TB; // GR8 = <= signed
+def SETLEm : I<0x9E, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setle\t$dst",
+ [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <= signed
+
+def SETGr : I<0x9F, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setg\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>,
+ TB; // GR8 = > signed
+def SETGm : I<0x9F, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setg\t$dst",
+ [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETBr : I<0x92, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setb\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>,
+ TB; // GR8 = < unsign
+def SETBm : I<0x92, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setb\t$dst",
+ [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = < unsign
+
+def SETAEr : I<0x93, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setae\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>,
+ TB; // GR8 = >= unsign
+def SETAEm : I<0x93, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setae\t$dst",
+ [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = >= unsign
+
+def SETBEr : I<0x96, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setbe\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>,
+ TB; // GR8 = <= unsign
+def SETBEm : I<0x96, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setbe\t$dst",
+ [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <= unsign
+
+def SETAr : I<0x97, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "seta\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>,
+ TB; // GR8 = > signed
+def SETAm : I<0x97, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "seta\t$dst",
+ [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = > signed
+
+def SETSr : I<0x98, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "sets\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>,
+ TB; // GR8 = <sign bit>
+def SETSm : I<0x98, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "sets\t$dst",
+ [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = <sign bit>
+def SETNSr : I<0x99, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setns\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>,
+ TB; // GR8 = !<sign bit>
+def SETNSm : I<0x99, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setns\t$dst",
+ [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = !<sign bit>
+
+def SETPr : I<0x9A, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setp\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>,
+ TB; // GR8 = parity
+def SETPm : I<0x9A, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setp\t$dst",
+ [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = parity
+def SETNPr : I<0x9B, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setnp\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>,
+ TB; // GR8 = not parity
+def SETNPm : I<0x9B, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setnp\t$dst",
+ [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = not parity
+
+def SETOr : I<0x90, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "seto\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
+ TB; // GR8 = overflow
+def SETOm : I<0x90, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "seto\t$dst",
+ [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = overflow
+def SETNOr : I<0x91, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setno\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
+ TB; // GR8 = not overflow
+def SETNOm : I<0x91, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setno\t$dst",
+ [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = not overflow
+} // Uses = [EFLAGS]
+
+
+// Integer comparisons
+let Defs = [EFLAGS] in {
+def CMP8rr : I<0x38, MRMDestReg,
+ (outs), (ins GR8 :$src1, GR8 :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>;
+def CMP16rr : I<0x39, MRMDestReg,
+ (outs), (ins GR16:$src1, GR16:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize;
+def CMP32rr : I<0x39, MRMDestReg,
+ (outs), (ins GR32:$src1, GR32:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>;
+def CMP8mr : I<0x38, MRMDestMem,
+ (outs), (ins i8mem :$src1, GR8 :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), GR8:$src2),
+ (implicit EFLAGS)]>;
+def CMP16mr : I<0x39, MRMDestMem,
+ (outs), (ins i16mem:$src1, GR16:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), GR16:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mr : I<0x39, MRMDestMem,
+ (outs), (ins i32mem:$src1, GR32:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), GR32:$src2),
+ (implicit EFLAGS)]>;
+def CMP8rm : I<0x3A, MRMSrcMem,
+ (outs), (ins GR8 :$src1, i8mem :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP16rm : I<0x3B, MRMSrcMem,
+ (outs), (ins GR16:$src1, i16mem:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32rm : I<0x3B, MRMSrcMem,
+ (outs), (ins GR32:$src1, i32mem:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)]>;
+def CMP8ri : Ii8<0x80, MRM7r,
+ (outs), (ins GR8:$src1, i8imm:$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>;
+def CMP16ri : Ii16<0x81, MRM7r,
+ (outs), (ins GR16:$src1, i16imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, imm:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32ri : Ii32<0x81, MRM7r,
+ (outs), (ins GR32:$src1, i32imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>;
+def CMP8mi : Ii8 <0x80, MRM7m,
+ (outs), (ins i8mem :$src1, i8imm :$src2),
+ "cmp{b}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi8 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>;
+def CMP16mi : Ii16<0x81, MRM7m,
+ (outs), (ins i16mem:$src1, i16imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mi : Ii32<0x81, MRM7m,
+ (outs), (ins i32mem:$src1, i32imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), imm:$src2),
+ (implicit EFLAGS)]>;
+def CMP16ri8 : Ii8<0x83, MRM7r,
+ (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP16mi8 : Ii8<0x83, MRM7m,
+ (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "cmp{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize;
+def CMP32mi8 : Ii8<0x83, MRM7m,
+ (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)]>;
+def CMP32ri8 : Ii8<0x83, MRM7r,
+ (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "cmp{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Bit tests.
+// TODO: BTC, BTR, and BTS
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+// "bt{w}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi16 addr:$src1), GR16:$src2),
+// (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
+//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+// "bt{l}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi32 addr:$src1), GR32:$src2),
+// (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movs{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movs{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+ "movz{wl|x}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+ (outs GR32_NOREX:$dst), (ins GR8:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+ (outs GR32_NOREX:$dst), (ins i8mem:$src),
+ "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
+ []>, TB;
+
+let neverHasSideEffects = 1 in {
+ let Defs = [AX], Uses = [AL] in
+ def CBW : I<0x98, RawFrm, (outs), (ins),
+ "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ let Defs = [EAX], Uses = [AX] in
+ def CWDE : I<0x98, RawFrm, (outs), (ins),
+ "{cwtl|cwde}", []>; // EAX = signext(AX)
+
+ let Defs = [AX,DX], Uses = [AX] in
+ def CWD : I<0x99, RawFrm, (outs), (ins),
+ "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ let Defs = [EAX,EDX], Uses = [EAX] in
+ def CDQ : I<0x99, RawFrm, (outs), (ins),
+ "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
+ "xor{b}\t$dst, $dst",
+ [(set GR8:$dst, 0)]>;
+// Use xorl instead of xorw since we don't care about the high 16 bits,
+// it's smaller, and it avoids a partial-register update.
+def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+ "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
+ [(set GR16:$dst, 0)]>;
+def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
+ "xor{l}\t$dst, $dst",
+ [(set GR32:$dst, 0)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+ MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP, EBX] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
+ "leal\t${sym:mem}(,%ebx,1), %eax; "
+ "call\t___tls_get_addr@PLT",
+ [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ Requires<[In32BitMode]>;
+
+let AddedComplexity = 5 in
+def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movl\t%gs:$src, $dst",
+ [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
+
+let AddedComplexity = 5 in
+def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movl\t%fs:$src, $dst",
+ [(set GR32:$dst, (fsload addr:$src))]>, SegFS;
+
+//===----------------------------------------------------------------------===//
+// DWARF Pseudo Instructions
+//
+
+def DWARF_LOC : I<0, Pseudo, (outs),
+ (ins i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc\t${file:debug} ${line:debug} ${col:debug}",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col),
+ (i32 imm:$file))]>;
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasCtrlDep = 1 in {
+def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+ "ret\t#eh_return, addr: $addr",
+ [(X86ehret GR32:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "xchg{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "xchg{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+ OpSize;
+def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
+ "xchg{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EFLAGS], Uses = [EAX] in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+ "lock\n\t"
+ "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+ "lock\n\t"
+ "cmpxchg8b\t$ptr",
+ [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX] in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+ "lock\n\t"
+ "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL] in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+ "lock\n\t"
+ "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS] in {
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "lock\n\t"
+ "xadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "lock\n\t"
+ "xadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ TB, OpSize, LOCK;
+def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
+ "lock\n\t"
+ "xadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ TB, LOCK;
+}
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+ (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+ (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+ (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+ (ADD32ri GR32:$src1, texternalsym:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+ (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+ (MOV32mi addr:$dst, texternalsym:$src)>;
+
+// Calls
+// tailcall stuff
+def : Pat<(X86tailcall GR32:$dst),
+ (TAILCALL)>;
+
+def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
+ (TAILCALL)>;
+def : Pat<(X86tailcall (i32 texternalsym:$dst)),
+ (TAILCALL)>;
+
+def : Pat<(X86tcret GR32:$dst, imm:$off),
+ (TCRETURNri GR32:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+ (TCRETURNdi texternalsym:$dst, imm:$off)>;
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+ (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+ (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+ (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// X86 specific add which produces a flag.
+def : Pat<(addc GR32:$src1, GR32:$src2),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(addc GR32:$src1, (load addr:$src2)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+def : Pat<(addc GR32:$src1, imm:$src2),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+def : Pat<(subc GR32:$src1, GR32:$src2),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(subc GR32:$src1, (load addr:$src2)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+def : Pat<(subc GR32:$src1, imm:$src2),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)),
+ (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)),
+ (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
+ (TEST32rr GR32:$src1, GR32:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO32rm GR32:$src2, addr:$src1)>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+
+// extload bool -> extload byte
+def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+
+// (and (i32 load), 255) -> (zextload i8)
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
+ (MOVZX32rm8 addr:$src)>;
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
+ (MOVZX32rm16 addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit)>,
+ Requires<[In32BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi)>,
+ Requires<[In32BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+ (EXTRACT_SUBREG
+ (MOVZX32rr8
+ (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi)),
+ x86_subreg_16bit)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+ (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
+ x86_subreg_8bit_hi))>,
+ Requires<[In32BitMode]>;
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
+// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
+def : Pat<(or (srl GR32:$src1, CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt),
+ (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
+def : Pat<(or (shl GR32:$src1, CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt),
+ (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
+// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
+def : Pat<(or (srl GR16:$src1, CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt),
+ (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
+// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
+def : Pat<(or (shl GR16:$src1, CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
+ (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// Register-Register Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (ADD8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Addition with EFLAGS result
+def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Addition with EFLAGS result
+def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (SUB8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Subtraction with EFLAGS result
+def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+
+// Register-Register Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Integer Signed Integer Multiply with EFLAGS result
+def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(parallel (X86smul_flag GR16:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src1)>;
+
+def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// INC and DEC with EFLAGS result. Note that these do not set CF.
+def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
+ (INC8r GR8:$src)>;
+def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC8m addr:$dst)>;
+def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
+ (DEC8r GR8:$src)>;
+def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC8m addr:$dst)>;
+
+def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
+ (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC16m addr:$dst)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
+ (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC16m addr:$dst)>, Requires<[In32BitMode]>;
+
+def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
+ (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (INC32m addr:$dst)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
+ (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst),
+ (implicit EFLAGS)),
+ (DEC32m addr:$dst)>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// X86-64 Support
+//===----------------------------------------------------------------------===//
+
+include "X86Instr64bit.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 000000000000..8f287e17dc3f
--- /dev/null
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,694 @@
+//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
+
+def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// MMX Masks
+//===----------------------------------------------------------------------===//
+
+// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to
+// PSHUFW imm.
+def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...>
+def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...>
+def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], MMX_SHUFFLE_get_shuf_imm>;
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let isTwoAddress = 1 in {
+ // MMXI_binop_rm - Simple MMX binary operator.
+ multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (OpVT (OpNode VR64:$src1,
+ (bitconvert
+ (load_mmx addr:$src2)))))]>;
+ }
+
+ multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ }
+
+ // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
+ //
+ // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
+ // to collapse (bitconvert VT to VT) into its operand.
+ //
+ multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (OpNode VR64:$src1,(load_mmx addr:$src2)))]>;
+ }
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2> {
+ def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+ (ins VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS & FEMMS Instructions
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
+def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg,
+ (outs GR64:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (v1i64 VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src),
+ "movdq2q\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (bitconvert
+ (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (movl immAllZerosV,
+ (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src),
+ "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+ "movntq\t{$src, $dst|$dst, $src}",
+ [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v2i32 (X86vzmovl (v2i32
+ (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>;
+defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>;
+defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>;
+defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>;
+
+defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>;
+defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>;
+defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>;
+defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>;
+
+defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+// -- Multiplication
+defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
+
+defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
+defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
+defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
+
+let isTwoAddress = 1 in {
+ def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ VR64:$src2)))]>;
+ def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
+ (load addr:$src2))))]>;
+}
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Shift up / down and insert zero's.
+def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSLLQri VR64:$src, imm:$amt))>;
+def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))),
+ (v1i64 (MMX_PSRLQri VR64:$src, imm:$amt))>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// Conversion Instructions
+
+// -- Unpack Instructions
+let isTwoAddress = 1 in {
+ // Unpack High Packed Data Instructions
+ def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckh VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckh VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckh VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)))))]>;
+
+ // Unpack Low Packed Data Instructions
+ def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (mmx_unpckl VR64:$src1,
+ (bc_v8i8 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_unpckl VR64:$src1,
+ (bc_v4i16 (load_mmx addr:$src2)))))]>;
+
+ def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, VR64:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>;
+ def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (mmx_unpckl VR64:$src1,
+ (bc_v2i32 (load_mmx addr:$src2)))))]>;
+}
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+ (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+ "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR64:$dst,
+ (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)),
+ (undef)))]>;
+
+// -- Conversion Instructions
+let neverHasSideEffects = 1 in {
+def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}", []>;
+} // end neverHasSideEffects
+
+
+// Extract / Insert
+def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+
+def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+ (iPTR imm:$src2)))]>;
+let isTwoAddress = 1 in {
+ def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
+ (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ GR32:$src2, (iPTR imm:$src3))))]>;
+ def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem,
+ (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1),
+ (i32 (anyext (loadi16 addr:$src2))),
+ (iPTR imm:$src3))))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map zero vector to pxor.
+let isReMaterializable = 1 in {
+ def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins),
+ "pxor\t$dst, $dst",
+ [(set VR64:$dst, (v2i32 immAllZerosV))]>;
+ def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins),
+ "pcmpeqd\t$dst, $dst",
+ [(set VR64:$dst, (v2i32 immAllOnesV))]>;
+}
+
+let Predicates = [HasMMX] in {
+ def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
+ def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
+ def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Store 64-bit integer vector values.
+def : Pat<(store (v8i8 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v4i16 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2i32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v2f32 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+def : Pat<(store (v1i64 VR64:$src), addr:$dst),
+ (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
+
+// Bit convert.
+def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
+
+// 64-bit bit convert.
+def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v2f32 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
+ (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v2f32 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
+ (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
+ (MMX_MOVQ2FR64rr VR64:$src)>;
+
+// Move scalar to MMX zero-extended
+// movd to MMX register zero-extends
+let AddedComplexity = 15 in {
+ def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
+ (MMX_MOVZDI2PDIrr GR32:$src)>;
+}
+
+let AddedComplexity = 20 in {
+ def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+ def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
+ (MMX_MOVZDI2PDIrm addr:$src)>;
+}
+
+// Clear top half.
+let AddedComplexity = 15 in {
+ def : Pat<(v8i8 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+ def : Pat<(v4i16 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+ def : Pat<(v2i32 (X86vzmovl VR64:$src)),
+ (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
+}
+
+// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
+// 8 or 16-bits matter.
+def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
+def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
+ (MMX_MOVD64rr GR32:$src)>;
+
+// Patterns to perform canonical versions of vector shuffling.
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))),
+ (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>;
+}
+
+let AddedComplexity = 10 in {
+ def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>;
+ def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>;
+ def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))),
+ (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
+}
+
+// Patterns to perform vector shuffling with a zeroed out vector.
+let AddedComplexity = 20 in {
+ def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
+ (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
+ (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
+}
+
+// Some special case PANDN patterns.
+// FIXME: Get rid of these.
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
+ VR64:$src2)),
+ (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
+
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
+ (load addr:$src2))),
+ (MMX_PANDNrm VR64:$src1, addr:$src2)>;
+
+// Move MMX to lower 64-bit of XMM
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))),
+ (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+// Move lower 64-bit of XMM to MMX.
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))))),
+ (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
+// CMOV* - Used to implement the SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+// These are expanded by the scheduler.
+let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+ def CMOV_V1I64 : I<0, Pseudo,
+ (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond),
+ "#CMOV_V1I64 PSEUDO!",
+ [(set VR64:$dst,
+ (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond,
+ EFLAGS)))]>;
+}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 000000000000..1fafa46fa2d8
--- /dev/null
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,3643 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm);
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
+def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others.
+// FIXME: Actually implement support for targets that don't require the
+// alignment. This probably wants a subtarget predicate.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
+def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
+def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+def PSxLDQ_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+// These are expanded by the scheduler.
+let Uses = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+ def CMOV_FR32 : I<0, Pseudo,
+ (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+ EFLAGS))]>;
+ def CMOV_V4F32 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V4F32 PSEUDO!",
+ [(set VR128:$dst,
+ (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2F64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2F64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V2I64 : I<0, Pseudo,
+ (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+ "#CMOV_V2I64 PSEUDO!",
+ [(set VR128:$dst,
+ (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+ EFLAGS)))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE1 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (loadf32 addr:$src))]>;
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "cvtsi2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvtss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
+def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvtss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_cvtss2si
+ (load addr:$src)))]>;
+
+// Match intrinisics which expect MM and XMM operand(s).
+def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
+def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvtps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtps2pi
+ (load addr:$src)))]>;
+def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
+def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
+ "cvttps2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttps2pi
+ (load addr:$src)))]>;
+let Constraints = "$src1 = $dst" in {
+ def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
+ "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
+ VR64:$src2))]>;
+ def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
+ "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
+ (load addr:$src2)))]>;
+}
+
+// Aliases for intrinsics
+def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si VR128:$src))]>;
+def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
+ "cvttss2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse_cvttss2si(load addr:$src)))]>;
+
+let Constraints = "$src1 = $dst" in {
+ def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ GR32:$src2))]>;
+ def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+}
+
+// Comparison instructions
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+ def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Defs = [EFLAGS] in {
+def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>;
+def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, (loadf32 addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let Constraints = "$src1 = $dst" in {
+ def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+let Defs = [EFLAGS] in {
+def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), VR128:$src2),
+ (implicit EFLAGS)]>;
+def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
+ "ucomiss\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+
+def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "comiss\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), VR128:$src2),
+ (implicit EFLAGS)]>;
+def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "comiss\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v4f32 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases of packed SSE1 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
+ "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+
+// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
+// disregarded.
+let neverHasSideEffects = 1 in
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
+// disregarded.
+let canFoldAsLoad = 1 in
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in {
+ def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
+ def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
+ def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
+}
+
+def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fand FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86for FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f128mem:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set FR32:$dst, (X86fxor FR32:$src1,
+ (memopfsf32 addr:$src2)))]>;
+
+let neverHasSideEffects = 1 in {
+def FsANDNPSrr : PSI<0x55, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}", []>;
+let mayLoad = 1 in
+def FsANDNPSrm : PSI<0x55, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}", []>;
+}
+}
+
+/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>;
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
+defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
+defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
+defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
+
+/// sse1_fp_binop_rm - Other SSE1 binops
+///
+/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F32Int VR128:$src1,
+ sse_load_f32:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>;
+}
+}
+
+defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse_max_ss, int_x86_sse_max_ps>;
+defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse_min_ss, int_x86_sse_min_ps>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+
+let neverHasSideEffects = 1 in
+def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv4f32 addr:$src))]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPS load and store
+let canFoldAsLoad = 1 in
+def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+
+let Constraints = "$src1 = $dst" in {
+ let AddedComplexity = 20 in {
+ def MOVLPSrm : PSI<0x12, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movlp VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ def MOVHPSrm : PSI<0x16, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movhp VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let AddedComplexity = 20 in {
+def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+
+def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+} // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+let AddedComplexity = 20 in {
+def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+
+
+// Arithmetic
+
+/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F32Int,
+ Intrinsic V4F32Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
+ int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
+
+// Logical
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 1 in {
+ def ANDPSrr : PSI<0x54, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (and VR128:$src1, VR128:$src2)))]>;
+ def ORPSrr : PSI<0x56, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (or VR128:$src1, VR128:$src2)))]>;
+ def XORPSrr : PSI<0x57, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64
+ (xor VR128:$src1, VR128:$src2)))]>;
+ }
+
+ def ANDPSrm : PSI<0x54, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "andps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ORPSrm : PSI<0x56, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "orps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def XORPSrm : PSI<0x57, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "xorps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ANDNPSrr : PSI<0x55, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))]>;
+ def ANDNPSrm : PSI<0x55, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
+ "andnps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (memopv2i64 addr:$src2))))]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
+ (memop addr:$src), imm:$cc))]>;
+}
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+// Shuffle and unpack instructions
+let Constraints = "$src1 = $dst" in {
+ let isConvertibleToThreeAddress = 1 in // Convert to pshufd
+ def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ VR128:$src2, i8imm:$src3),
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ f128mem:$src2, i8imm:$src3),
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v4f32 (shufp:$src3
+ VR128:$src1, (memopv4f32 addr:$src2))))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpckhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpckhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckh VR128:$src1,
+ (memopv4f32 addr:$src2))))]>;
+
+ def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpcklps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpcklps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+// Mask creation
+def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
+def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+
+// Non-temporal stores
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+
+// Load, store, and memory fence
+def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+
+// MXCSR register
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins),
+ "xorps\t$dst, $dst",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+
+let Predicates = [HasSSE1] in {
+ def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+ def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+ def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+ def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+}
+
+// FR32 to 128-bit vector conversion.
+let isAsCheapAsAMove = 1 in
+def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector FR32:$src)))]>;
+def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+let isAsCheapAsAMove = 1 in
+def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store (f32 (vector_extract (v4f32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let Constraints = "$src1 = $dst" in {
+let neverHasSideEffects = 1 in
+ def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
+}
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in
+def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
+ (loadf32 addr:$src))))))]>;
+
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (MOVZSS2PSrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE2 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (loadf64 addr:$src))]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+// Conversion instructions
+def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+ "cvtsd2ss\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
+def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
+ "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
+def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
+ "cvtsi2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+ Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+ "cvtss2sd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+ Requires<[HasSSE2]>;
+
+// Match intrinsics which expect XMM operand(s).
+def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvtsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
+def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
+ "cvtsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvtsd2si
+ (load addr:$src)))]>;
+
+// Match intrinisics which expect MM and XMM operand(s).
+def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
+def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvtpd2pi
+ (memop addr:$src)))]>;
+def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
+def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
+ "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst, (int_x86_sse_cvttpd2pi
+ (memop addr:$src)))]>;
+def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
+def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_cvtpi2pd
+ (load addr:$src)))]>;
+
+// Aliases for intrinsics
+def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (int_x86_sse2_cvttsd2si VR128:$src))]>;
+def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
+ "cvttsd2si\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_cvttsd2si
+ (load addr:$src)))]>;
+
+// Comparison instructions
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in
+ def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+}
+
+let Defs = [EFLAGS] in {
+def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>;
+def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, (loadf64 addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let Constraints = "$src1 = $dst" in {
+ def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+let Defs = [EFLAGS] in {
+def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
+ (implicit EFLAGS)]>;
+def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
+ "ucomisd\t{$src2, $src1|$src1, $src2}",
+ [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+
+def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)),
+ (implicit EFLAGS)]>;
+def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "comisd\t{$src2, $src1|$src1, $src2}",
+ [(X86comi (v2f64 VR128:$src1), (load addr:$src2)),
+ (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+// Aliases of packed SSE2 instructions for scalar use. These all have names that
+// start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins),
+ "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
+// disregarded.
+let neverHasSideEffects = 1 in
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+
+// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
+// disregarded.
+let canFoldAsLoad = 1 in
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in {
+ def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
+ def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
+ def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
+}
+
+def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fand FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86for FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f128mem:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set FR64:$dst, (X86fxor FR64:$src1,
+ (memopfsf64 addr:$src2)))]>;
+
+let neverHasSideEffects = 1 in {
+def FsANDNPDrr : PDI<0x55, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}", []>;
+let mayLoad = 1 in
+def FsANDNPDrm : PDI<0x55, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}", []>;
+}
+}
+
+/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem, so there are a total of
+/// six "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>;
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+}
+}
+
+// Arithmetic instructions
+defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
+defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
+defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
+defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
+
+/// sse2_fp_binop_rm - Other SSE2 binops
+///
+/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
+/// instructions for a full-vector intrinsic form. Operations that map
+/// onto C operators don't use this form since they just use the plain
+/// vector form instead of having a separate vector intrinsic form.
+///
+/// This provides a total of eight "instructions".
+///
+let Constraints = "$src1 = $dst" in {
+multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+
+ // Scalar operation, reg+reg.
+ def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, reg+mem.
+ def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
+
+ // Vector operation, reg+reg.
+ def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, reg+mem.
+ def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
+
+ // Intrinsic operation, reg+reg.
+ def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, reg+mem.
+ def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (F64Int VR128:$src1,
+ sse_load_f64:$src2))]>;
+
+ // Vector intrinsic operation, reg+reg.
+ def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, reg+mem.
+ def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1,
+ (memopv2f64 addr:$src2)))]>;
+}
+}
+
+defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
+ int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
+defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
+ int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
+
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let neverHasSideEffects = 1 in
+def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2f64 addr:$src))]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPD load and store
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+let Constraints = "$src1 = $dst" in {
+ let AddedComplexity = 20 in {
+ def MOVLPDrm : PDI<0x12, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movlpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movlp VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ def MOVHPDrm : PDI<0x16, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "movhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movhp VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, Requires<[HasSSE2]>;
+
+// SSE2 instructions with XS prefix
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>;
+// SSE2 packed instructions with XS prefix
+def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 packed instructions with XD prefix
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>;
+
+// SSE2 instructions without OpSize prefix
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ TB, Requires<[HasSSE2]>;
+
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+
+// Match intrinsics which expect XMM operand(s).
+// Aliases for intrinsics
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
+ "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ GR32:$src2))]>;
+def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
+ "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
+ (loadi32 addr:$src2)))]>;
+def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ VR128:$src2))]>;
+def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
+ (load addr:$src2)))]>;
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
+}
+
+// Arithmetic
+
+/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+///
+/// These four forms can each have a reg or a mem operand, so there are a
+/// total of eight "instructions".
+///
+multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,
+ Intrinsic F64Int,
+ Intrinsic V2F64Int,
+ bit Commutable = 0> {
+ // Scalar operation, reg.
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode FR64:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Scalar operation, mem.
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+
+ // Vector operation, reg.
+ def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector operation, mem.
+ def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src))]> {
+ let isCommutable = Commutable;
+ }
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+// Square root.
+defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
+ int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+// Logical
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 1 in {
+ def ANDPDrr : PDI<0x54, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ORPDrr : PDI<0x56, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def XORPDrr : PDI<0x57, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ }
+
+ def ANDPDrm : PDI<0x54, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "andpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ORPDrm : PDI<0x56, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "orpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (or (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def XORPDrm : PDI<0x57, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "xorpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (xor (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]>;
+ def ANDNPDrr : PDI<0x55, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))]>;
+ def ANDNPDrm : PDI<0x55, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
+ "andnpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (memopv2i64 addr:$src2)))]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+ def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
+ (memop addr:$src), imm:$cc))]>;
+}
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+// Shuffle and unpack instructions
+let Constraints = "$src1 = $dst" in {
+ def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
+ def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ f128mem:$src2, i8imm:$src3),
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (v2f64 (shufp:$src3
+ VR128:$src1, (memopv2f64 addr:$src2))))]>;
+
+ let AddedComplexity = 10 in {
+ def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckh VR128:$src1,
+ (memopv2f64 addr:$src2))))]>;
+
+ def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
+ } // AddedComplexity
+} // Constraints = "$src1 = $dst"
+
+
+//===----------------------------------------------------------------------===//
+// SSE integer instructions
+
+// Move Instructions
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1, mayLoad = 1 in
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+let mayStore = 1 in
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+let canFoldAsLoad = 1, mayLoad = 1 in
+def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, Requires<[HasSSE2]>;
+let mayStore = 1 in
+def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, Requires<[HasSSE2]>;
+
+// Intrinsic forms of MOVDQU load and store
+let canFoldAsLoad = 1 in
+def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, Requires<[HasSSE2]>;
+def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, Requires<[HasSSE2]>;
+
+let Constraints = "$src1 = $dst" in {
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr,
+ Intrinsic IntId, Intrinsic IntId2> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Commutable = 0> {
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
+}
+
+} // Constraints = "$src1 = $dst"
+
+// 128-bit Integer Arithmetic
+
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+
+
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+// 128-bit logical shifts.
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+}
+
+let Predicates = [HasSSE2] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSLLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (PSRLDQri VR128:$src, (PSxLDQ_imm imm:$amt)))>;
+}
+
+// Logical
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let Constraints = "$src1 = $dst" in {
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>;
+}
+
+// SSE2 Integer comparison
+defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
+defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
+defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
+defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+ (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+ (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+ (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+ (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+ (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+ (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+ (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+ (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+ (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+ (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+ (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+ (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+
+// Pack instructions
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+
+// Shuffle and unpack instructions
+def PSHUFDri : PDIi8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (pshufd:$src2
+ VR128:$src1, (undef))))]>;
+def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v4i32 (pshufd:$src2
+ (bc_v4i32(memopv2i64 addr:$src1)),
+ (undef))))]>;
+
+// SSE2 with ImmT == Imm8 and XS prefix.
+def PSHUFHWri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
+ (undef))))]>,
+ XS, Requires<[HasSSE2]>;
+def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshufhw:$src2
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef))))]>,
+ XS, Requires<[HasSSE2]>;
+
+// SSE2 with ImmT == Imm8 and XD prefix.
+def PSHUFLWri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
+ (undef))))]>,
+ XD, Requires<[HasSSE2]>;
+def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v8i16 (pshuflw:$src2
+ (bc_v8i16 (memopv2i64 addr:$src1)),
+ (undef))))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+let Constraints = "$src1 = $dst" in {
+ def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckldq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckl VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+
+ def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhbw\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhwd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (unpckh VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+ def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>;
+}
+
+// Extract / Insert
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>;
+let Constraints = "$src1 = $dst" in {
+ def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Mask creation
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+// Conditional store
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+// Non-temporal stores
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM5r, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM6r, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
+ "pcmpeqd\t$dst, $dst",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+// FR64 to 128-bit vector conversion.
+let isAsCheapAsAMove = 1 in
+def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector FR64:$src)))]>;
+def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
+
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// SSE2 instructions with XS prefix
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+// FIXME: may not be able to eliminate this movss with coalescing the src and
+// dest register classes are different. We really want to write this pattern
+// like this:
+// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+// (f32 FR32:$src)>;
+let isAsCheapAsAMove = 1 in
+def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>;
+def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+
+// Move to lower bits of a VR128, leaving upper bits alone.
+// Three operand (but two address) aliases.
+let Constraints = "$src1 = $dst" in {
+ let neverHasSideEffects = 1 in
+ def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}", []>;
+
+ let AddedComplexity = 15 in
+ def MOVLPDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
+}
+
+// Store / copy lower 64-bits of a XMM register.
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+// Move to lower bits of a VR128 and zeroing upper bits.
+// Loading from memory automatically zeroing upper bits.
+let AddedComplexity = 20 in {
+def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
+ (loadf64 addr:$src))))))]>;
+
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (MOVZSD2PDrm addr:$src)>;
+def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
+}
+
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>;
+// This is X86-64 only.
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVZDI2PDIrm addr:$src)>;
+
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>, XS,
+ Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+ (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+ (MOVZPQILo2PQIrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movshdup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (movshdup
+ VR128:$src, (undef))))]>;
+def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movshdup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (movshdup
+ (memopv4f32 addr:$src), (undef)))]>;
+
+def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movsldup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4f32 (movsldup
+ VR128:$src, (undef))))]>;
+def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movsldup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (movsldup
+ (memopv4f32 addr:$src), (undef)))]>;
+
+def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movddup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "movddup\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+ (undef))))]>;
+
+def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+ (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+let AddedComplexity = 5 in {
+def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+ (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+}
+
+// Arithmetic
+let Constraints = "$src1 = $dst" in {
+ def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "addsubps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "addsubps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
+ (memop addr:$src2)))]>;
+ def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "addsubpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ VR128:$src2))]>;
+ def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ "addsubpd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
+ (memop addr:$src2)))]>;
+}
+
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+// Horizontal ops
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+ : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+
+let Constraints = "$src1 = $dst" in {
+ def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
+ def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
+ def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
+ def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+}
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===----------------------------------------------------------------------===//
+// SSSE3 Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
+multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+}
+
+/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
+multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64
+ (bitconvert (memopv4i16 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
+multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR64:$dst,
+ (IntId64
+ (bitconvert (memopv2i32 addr:$src))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>,
+ OpSize;
+
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
+}
+
+defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>;
+
+/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv4i16 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
+ }
+}
+
+/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
+let Constraints = "$src1 = $dst" in {
+ multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
+ let isCommutable = Commutable;
+ }
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv2i32 addr:$src2))))]>;
+
+ def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128>;
+defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128>;
+defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128>;
+defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128>;
+defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128>;
+defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128>;
+defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128, 1>;
+defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128>;
+defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128>;
+defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128>;
+defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128>;
+
+let Constraints = "$src1 = $dst" in {
+ def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i16imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (int_x86_ssse3_palign_r
+ VR64:$src1, VR64:$src2,
+ imm:$src3))]>;
+ def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i16imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR64:$dst,
+ (int_x86_ssse3_palign_r
+ VR64:$src1,
+ (bitconvert (memopv2i32 addr:$src2)),
+ imm:$src3))]>;
+
+ def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_ssse3_palign_r_128
+ VR128:$src1, VR128:$src2,
+ imm:$src3))]>, OpSize;
+ def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32imm:$src3),
+ "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_ssse3_palign_r_128
+ VR128:$src1,
+ (bitconvert (memopv4i32 addr:$src2)),
+ imm:$src3))]>, OpSize;
+}
+
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// extload f32 -> f64. This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+ (CVTSS2SDrm addr:$src)>;
+
+// bit_convert
+let Predicates = [HasSSE2] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+ (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+ (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+ (UNPCKLPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+ (UNPCKHPDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+ (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+ (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Special unary SHUFPSrri case.
+def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPSrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE1]>;
+let AddedComplexity = 5 in
+def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+ (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+ (SHUFPDrri VR128:$src1, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+ (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[HasSSE2]>;
+
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+ (SHUFPSrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (SHUFPSrmi VR128:$src1, addr:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+// Special binary v2i64 shuffle cases using SHUFPDrri.
+def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+ (SHUFPDrri VR128:$src1, VR128:$src2,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>,
+ Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+ (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
+ (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+ Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+ (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+ (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
+def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+}
+
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
+def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+ addr:$src1),
+ (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
+ (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+ (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+ (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+}
+
+// Set lowest element and zero upper elements.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Some special case pandn patterns.
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ VR128:$src2)),
+ (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
+ (memop addr:$src2))),
+ (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+
+// vector -> vector casts
+def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+ (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
+ (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
+ (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
+def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
+def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
+def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
+
+def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic V4F32Int,
+ Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : SS4AIi8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
+ int_x86_sse41_pcmpeqq, 1>;
+defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
+ int_x86_sse41_packusdw, 0>;
+defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
+ int_x86_sse41_pminsb, 1>;
+defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
+ int_x86_sse41_pminsd, 1>;
+defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
+ int_x86_sse41_pminud, 1>;
+defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
+ int_x86_sse41_pminuw, 1>;
+defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
+ int_x86_sse41_pmaxsb, 1>;
+defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
+ int_x86_sse41_pmaxsd, 1>;
+defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
+ int_x86_sse41_pmaxud, 1>;
+defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
+ int_x86_sse41_pmaxuw, 1>;
+
+defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
+ SDNode OpNode, Intrinsic IntId128,
+ bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
+ VR128:$src2))]>, OpSize {
+ let isCommutable = Commutable;
+ }
+ def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
+ def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, (memop addr:$src2)))]>,
+ OpSize;
+ }
+}
+defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
+ int_x86_sse41_pmulld, 1>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
+ }
+}
+
+defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
+ int_x86_sse41_blendps, 0>;
+defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
+ int_x86_sse41_blendpd, 0>;
+defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
+ int_x86_sse41_pblendw, 0>;
+defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
+ int_x86_sse41_dpps, 1>;
+defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
+ int_x86_sse41_dppd, 1>;
+defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
+ int_x86_sse41_mpsadbw, 1>;
+
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
+}
+
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
+}
+
+defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
+ }
+}
+
+defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+ }
+}
+
+defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+let Constraints = "$src1 = $dst" in {
+ multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, FR32:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, (loadf32 addr:$src2),
+ imm:$src3))]>, OpSize;
+ }
+}
+
+defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize;
+}
+
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+let Constraints = "$src1 = $dst" in {
+ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Commutable = 0> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+ }
+}
+
+defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+ (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+ (PCMPGTQrm VR128:$src1, addr:$src2)>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 000000000000..f92310607a8e
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,560 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "llvm/Function.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ unsigned char *OldByte = (unsigned char *)Old;
+ *OldByte++ = 0xE9; // Emit JMP opcode.
+ unsigned *OldWord = (unsigned *)OldByte;
+ unsigned NewAddr = (intptr_t)New;
+ unsigned OldAddr = (intptr_t)OldWord;
+ *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host. This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// Check if building with -fPIC
+#if defined(__PIC__) && __PIC__ && defined(__linux__)
+#define ASMCALLSUFFIX "@PLT"
+#else
+#define ASMCALLSUFFIX
+#endif
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+// need to add some workarounds for compilers, which support
+// only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+ // No need to save EAX/EDX for X86-64.
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ // Save RBP
+ "pushq %rbp\n"
+ CFI(".cfi_def_cfa_offset 16\n")
+ CFI(".cfi_offset %rbp, -16\n")
+ // Save RSP
+ "movq %rsp, %rbp\n"
+ CFI(".cfi_def_cfa_register %rbp\n")
+ // Save all int arg registers
+ "pushq %rdi\n"
+ CFI(".cfi_rel_offset %rdi, 0\n")
+ "pushq %rsi\n"
+ CFI(".cfi_rel_offset %rsi, 8\n")
+ "pushq %rdx\n"
+ CFI(".cfi_rel_offset %rdx, 16\n")
+ "pushq %rcx\n"
+ CFI(".cfi_rel_offset %rcx, 24\n")
+ "pushq %r8\n"
+ CFI(".cfi_rel_offset %r8, 32\n")
+ "pushq %r9\n"
+ CFI(".cfi_rel_offset %r9, 40\n")
+ // Align stack on 16-byte boundary. ESP might not be properly aligned
+ // (8 byte) if this is called from an indirect stub.
+ "andq $-16, %rsp\n"
+ // Save all XMM arg registers
+ "subq $128, %rsp\n"
+ "movaps %xmm0, (%rsp)\n"
+ "movaps %xmm1, 16(%rsp)\n"
+ "movaps %xmm2, 32(%rsp)\n"
+ "movaps %xmm3, 48(%rsp)\n"
+ "movaps %xmm4, 64(%rsp)\n"
+ "movaps %xmm5, 80(%rsp)\n"
+ "movaps %xmm6, 96(%rsp)\n"
+ "movaps %xmm7, 112(%rsp)\n"
+ // JIT callee
+ "movq %rbp, %rdi\n" // Pass prev frame and return address
+ "movq 8(%rbp), %rsi\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ // Restore all XMM arg registers
+ "movaps 112(%rsp), %xmm7\n"
+ "movaps 96(%rsp), %xmm6\n"
+ "movaps 80(%rsp), %xmm5\n"
+ "movaps 64(%rsp), %xmm4\n"
+ "movaps 48(%rsp), %xmm3\n"
+ "movaps 32(%rsp), %xmm2\n"
+ "movaps 16(%rsp), %xmm1\n"
+ "movaps (%rsp), %xmm0\n"
+ // Restore RSP
+ "movq %rbp, %rsp\n"
+ CFI(".cfi_def_cfa_register %rsp\n")
+ // Restore all int arg registers
+ "subq $48, %rsp\n"
+ CFI(".cfi_adjust_cfa_offset 48\n")
+ "popq %r9\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r9\n")
+ "popq %r8\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %r8\n")
+ "popq %rcx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rcx\n")
+ "popq %rdx\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdx\n")
+ "popq %rsi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rsi\n")
+ "popq %rdi\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rdi\n")
+ // Restore RBP
+ "popq %rbp\n"
+ CFI(".cfi_adjust_cfa_offset -8\n")
+ CFI(".cfi_restore %rbp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+# else
+ // No inline assembler support on this platform. The routine is in external
+ // file.
+ void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+ void X86CompilationCallback(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback\n"
+ TYPE_FUNCTION(X86CompilationCallback)
+ ASMPREFIX "X86CompilationCallback:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+# if defined(__APPLE__)
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+# endif
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register %esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback)
+ );
+
+ // Same as X86CompilationCallback but also saves XMM argument registers.
+ void X86CompilationCallback_SSE(void);
+ asm(
+ ".text\n"
+ ".align 8\n"
+ ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+ TYPE_FUNCTION(X86CompilationCallback_SSE)
+ ASMPREFIX "X86CompilationCallback_SSE:\n"
+ CFI(".cfi_startproc\n")
+ "pushl %ebp\n"
+ CFI(".cfi_def_cfa_offset 8\n")
+ CFI(".cfi_offset %ebp, -8\n")
+ "movl %esp, %ebp\n" // Standard prologue
+ CFI(".cfi_def_cfa_register %ebp\n")
+ "pushl %eax\n"
+ CFI(".cfi_rel_offset %eax, 0\n")
+ "pushl %edx\n" // Save EAX/EDX/ECX
+ CFI(".cfi_rel_offset %edx, 4\n")
+ "pushl %ecx\n"
+ CFI(".cfi_rel_offset %ecx, 8\n")
+ "andl $-16, %esp\n" // Align ESP on 16-byte boundary
+ // Save all XMM arg registers
+ "subl $64, %esp\n"
+ // FIXME: provide frame move information for xmm registers.
+ // This can be tricky, because CFA register is ebp (unaligned)
+ // and we need to produce offsets relative to it.
+ "movaps %xmm0, (%esp)\n"
+ "movaps %xmm1, 16(%esp)\n"
+ "movaps %xmm2, 32(%esp)\n"
+ "movaps %xmm3, 48(%esp)\n"
+ "subl $16, %esp\n"
+ "movl 4(%ebp), %eax\n" // Pass prev frame and return address
+ "movl %eax, 4(%esp)\n"
+ "movl %ebp, (%esp)\n"
+ "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n"
+ "addl $16, %esp\n"
+ "movaps 48(%esp), %xmm3\n"
+ CFI(".cfi_restore %xmm3\n")
+ "movaps 32(%esp), %xmm2\n"
+ CFI(".cfi_restore %xmm2\n")
+ "movaps 16(%esp), %xmm1\n"
+ CFI(".cfi_restore %xmm1\n")
+ "movaps (%esp), %xmm0\n"
+ CFI(".cfi_restore %xmm0\n")
+ "movl %ebp, %esp\n" // Restore ESP
+ CFI(".cfi_def_cfa_register esp\n")
+ "subl $12, %esp\n"
+ CFI(".cfi_adjust_cfa_offset 12\n")
+ "popl %ecx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ecx\n")
+ "popl %edx\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %edx\n")
+ "popl %eax\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %eax\n")
+ "popl %ebp\n"
+ CFI(".cfi_adjust_cfa_offset -4\n")
+ CFI(".cfi_restore %ebp\n")
+ "ret\n"
+ CFI(".cfi_endproc\n")
+ SIZE(X86CompilationCallback_SSE)
+ );
+# else
+ void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+ _declspec(naked) void X86CompilationCallback(void) {
+ __asm {
+ push ebp
+ mov ebp, esp
+ push eax
+ push edx
+ push ecx
+ and esp, -16
+ mov eax, dword ptr [ebp+4]
+ mov dword ptr [esp+4], eax
+ mov dword ptr [esp], ebp
+ call X86CompilationCallback2
+ mov esp, ebp
+ sub esp, 12
+ pop ecx
+ pop edx
+ pop eax
+ pop ebp
+ ret
+ }
+ }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+ void X86CompilationCallback() {
+ assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n");
+ abort();
+ }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call. This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" void ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+ intptr_t *RetAddrLoc = &StackPtr[1];
+ assert(*RetAddrLoc == RetAddr &&
+ "Could not find return address on the stack!");
+
+ // It's a stub if there is an interrupt marker after the call.
+ bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
+
+ // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+ RetAddr--; // Backtrack to the reference itself...
+#else
+ RetAddr -= 4; // Backtrack to the reference itself...
+#endif
+
+#if 0
+ DOUT << "In callback! Addr=" << (void*)RetAddr
+ << " ESP=" << (void*)StackPtr
+ << ": Resolving call to function: "
+ << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n";
+#endif
+
+ // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+ assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+ assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+ assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+ intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+ // Rewrite the call target... so that we don't end up here every time we
+ // execute the call.
+#if defined (X86_64_JIT)
+ if (!isStub)
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+#else
+ *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+ if (isStub) {
+ // If this is a stub, rewrite the call into an unconditional branch
+ // instruction so that two return addresses are not pushed onto the stack
+ // when the requested function finally gets called. This also makes the
+ // 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+ // If the target address is within 32-bit range of the stub, use a
+ // PC-relative branch instead of loading the actual address. (This is
+ // considerably shorter than the 64-bit immediate load already there.)
+ // We assume here intptr_t is 64 bits.
+ intptr_t diff = NewVal-RetAddr+7;
+ if (diff >= -2147483648LL && diff <= 2147483647LL) {
+ *(unsigned char*)(RetAddr-0xc) = 0xE9;
+ *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+ } else {
+ *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+ }
+#else
+ ((unsigned char*)RetAddr)[-1] = 0xE9;
+#endif
+ }
+
+ // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+ *RetAddrLoc -= 0xd;
+#else
+ *RetAddrLoc -= 5;
+#endif
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) {
+ // FIXME: support for AMD family of processors.
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+ if ((EDX >> 25) & 0x1)
+ return X86CompilationCallback_SSE;
+ }
+ }
+#endif
+
+ return X86CompilationCallback;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+ JCE.startGVStub(GV, 8, 8);
+ JCE.emitWordLE((unsigned)(intptr_t)ptr);
+ JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32));
+#else
+ JCE.startGVStub(GV, 4, 4);
+ JCE.emitWordLE((intptr_t)ptr);
+#endif
+ return JCE.finishGVStub(GV);
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+ bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback &&
+ Fn != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+ bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback;
+#endif
+ if (NotCC) {
+#if defined (X86_64_JIT)
+ JCE.startGVStub(F, 13, 4);
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Fn);
+ JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // jmpq *r10
+ JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+ JCE.startGVStub(F, 5, 4);
+ JCE.emitByte(0xE9);
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+#endif
+ return JCE.finishGVStub(F);
+ }
+
+#if defined (X86_64_JIT)
+ JCE.startGVStub(F, 14, 4);
+ JCE.emitByte(0x49); // REX prefix
+ JCE.emitByte(0xB8+2); // movabsq r10
+ JCE.emitWordLE((unsigned)(intptr_t)Fn);
+ JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32));
+ JCE.emitByte(0x41); // REX prefix
+ JCE.emitByte(0xFF); // callq *r10
+ JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+ JCE.startGVStub(F, 6, 4);
+ JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination...
+
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+#endif
+
+ JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub!
+ return JCE.finishGVStub(F);
+}
+
+void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub,
+ JITCodeEmitter &JCE) {
+ // Note, we cast to intptr_t here to silence a -pedantic warning that
+ // complains about casting a function pointer to a normal pointer.
+ JCE.startGVStub(F, Stub, 5);
+ JCE.emitByte(0xE9);
+#if defined (X86_64_JIT)
+ assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) ==
+ ((intptr_t)Fn-JCE.getCurrentPCValue()-5)
+ && "PIC displacement does not fit in displacement field!");
+#endif
+ JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4);
+ JCE.finishGVStub(F);
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+ return BB - Entry;
+#else
+ return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((X86::RelocationType)MR->getRelocationType()) {
+ case X86::reloc_pcrel_word: {
+ // PC relative relocation, add the relocated value to the value already in
+ // memory, after we adjust it for where the PC is.
+ ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_picrel_word: {
+ // PIC base relative relocation, add the relocated value to the value
+ // already in memory, after we adjust it for where the PIC base is.
+ ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ }
+ case X86::reloc_absolute_word:
+ // Absolute relocation, just add the relocated value to the value already
+ // in memory.
+ *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+ break;
+ case X86::reloc_absolute_dword:
+ *((intptr_t*)RelocPos) += ResultPtr;
+ break;
+ }
+ }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+ TLSOffset -= size;
+ return TLSOffset;
+#else
+ assert(0 && "Cannot allocate thread local storage on this arch!\n");
+ return 0;
+#endif
+}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 000000000000..6a4e2148a5aa
--- /dev/null
+++ b/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,84 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+ class X86TargetMachine;
+
+ class X86JITInfo : public TargetJITInfo {
+ X86TargetMachine &TM;
+ uintptr_t PICBase;
+ char* TLSOffset;
+ public:
+ explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+ useGOT = 0;
+ TLSOffset = 0;
+ }
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+ /// to emit an indirect symbol which contains the address of the specified
+ /// ptr.
+ virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to
+ /// emit a small native function that simply calls Fn. Emit the stub into
+ /// the supplied buffer.
+ virtual void emitFunctionStubAtAddr(const Function* F, void *Fn,
+ void *Buffer, JITCodeEmitter &JCE);
+
+ /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+ /// specific basic block.
+ virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase);
+
+ /// allocateThreadLocalMemory - Each target has its own way of
+ /// handling thread local variables. This method returns a value only
+ /// meaningful to the target.
+ virtual char* allocateThreadLocalMemory(size_t size);
+
+ /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+ /// PIC jumptable entry.
+ void setPICBase(uintptr_t Base) { PICBase = Base; }
+ uintptr_t getPICBase() const { return PICBase; }
+ };
+}
+
+#endif
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 000000000000..8a5ac2c9a85c
--- /dev/null
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,112 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+enum NameDecorationStyle {
+ None,
+ StdCall,
+ FastCall
+};
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+ /// ForceFramePointer - True if the function is required to use of frame
+ /// pointer for reasons other than it containing dynamic allocation or
+ /// that FP eliminatation is turned off. For example, Cygwin main function
+ /// contains stack pointer re-alignment code which requires FP.
+ bool ForceFramePointer;
+
+ /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// BytesToPopOnReturn - Number of bytes function pops on return.
+ /// Used on windows platform for stdcall & fastcall name decoration
+ unsigned BytesToPopOnReturn;
+
+ /// DecorationStyle - If the function requires additional name decoration,
+ /// DecorationStyle holds the right way to do so.
+ NameDecorationStyle DecorationStyle;
+
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
+ /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
+ /// Used for creating an area before the register spill area on the stack
+ /// the returnaddr can be savely move to this area
+ int TailCallReturnAddrDelta;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+public:
+ X86MachineFunctionInfo() : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0) {}
+
+ X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+ void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+ NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
+ void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+ int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+ void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 000000000000..5af1fb17818d
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,1280 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+ const TargetInstrInfo &tii)
+ : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::ADJCALLSTACKDOWN64 :
+ X86::ADJCALLSTACKDOWN32,
+ tm.getSubtarget<X86Subtarget>().is64Bit() ?
+ X86::ADJCALLSTACKUP64 :
+ X86::ADJCALLSTACKUP32),
+ TM(tm), TII(tii) {
+ // Cache some information.
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ Is64Bit = Subtarget->is64Bit();
+ IsWin64 = Subtarget->isTargetWin64();
+ StackAlign = TM.getFrameInfo()->getStackAlignment();
+ if (Is64Bit) {
+ SlotSize = 8;
+ StackPtr = X86::RSP;
+ FramePtr = X86::RBP;
+ } else {
+ SlotSize = 4;
+ StackPtr = X86::ESP;
+ FramePtr = X86::EBP;
+ }
+}
+
+// getDwarfRegNum - This function maps LLVM register identifiers to the
+// Dwarf specific numbering, used in debug info and exception tables.
+
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ unsigned Flavour = DWARFFlavour::X86_64;
+ if (!Subtarget->is64Bit()) {
+ if (Subtarget->isTargetDarwin()) {
+ if (isEH)
+ Flavour = DWARFFlavour::X86_32_DarwinEH;
+ else
+ Flavour = DWARFFlavour::X86_32_Generic;
+ } else if (Subtarget->isTargetCygMing()) {
+ // Unsupported by now, just quick fallback
+ Flavour = DWARFFlavour::X86_32_Generic;
+ } else {
+ Flavour = DWARFFlavour::X86_32_Generic;
+ }
+ }
+
+ return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
+}
+
+// getX86RegNum - This function maps LLVM register identifiers to their X86
+// specific numbering, which is used in various places encoding instructions.
+//
+unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
+ switch(RegNo) {
+ case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+ case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+ case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+ case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+ case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+ return N86::ESP;
+ case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+ return N86::EBP;
+ case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+ return N86::ESI;
+ case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+ return N86::EDI;
+
+ case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
+ return N86::EAX;
+ case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
+ return N86::ECX;
+ case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+ return N86::EDX;
+ case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+ return N86::EBX;
+ case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+ return N86::ESP;
+ case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+ return N86::EBP;
+ case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+ return N86::ESI;
+ case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+ return N86::EDI;
+
+ case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+ case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+ return RegNo-X86::ST0;
+
+ case X86::XMM0: case X86::XMM8: case X86::MM0:
+ return 0;
+ case X86::XMM1: case X86::XMM9: case X86::MM1:
+ return 1;
+ case X86::XMM2: case X86::XMM10: case X86::MM2:
+ return 2;
+ case X86::XMM3: case X86::XMM11: case X86::MM3:
+ return 3;
+ case X86::XMM4: case X86::XMM12: case X86::MM4:
+ return 4;
+ case X86::XMM5: case X86::XMM13: case X86::MM5:
+ return 5;
+ case X86::XMM6: case X86::XMM14: case X86::MM6:
+ return 6;
+ case X86::XMM7: case X86::XMM15: case X86::MM7:
+ return 7;
+
+ default:
+ assert(isVirtualRegister(RegNo) && "Unknown physical register!");
+ assert(0 && "Register allocator hasn't allocated reg correctly yet!");
+ return 0;
+ }
+}
+
+const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->is64Bit())
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &X86::CCRRegClass) {
+ if (Is64Bit)
+ return &X86::GR64RegClass;
+ else
+ return &X86::GR32RegClass;
+ }
+ return NULL;
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+
+ if (MF) {
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+ }
+
+ static const unsigned CalleeSavedRegs32Bit[] = {
+ X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs32EHRet[] = {
+ X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64Bit[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegs64EHRet[] = {
+ X86::RAX, X86::RDX, X86::RBX, X86::R12,
+ X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+
+ static const unsigned CalleeSavedRegsWin64[] = {
+ X86::RBX, X86::RBP, X86::RDI, X86::RSI,
+ X86::R12, X86::R13, X86::R14, X86::R15,
+ X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9,
+ X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+ X86::XMM14, X86::XMM15, 0
+ };
+
+ if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegsWin64;
+ else
+ return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+ } else {
+ return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+ }
+}
+
+const TargetRegisterClass* const*
+X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ bool callsEHReturn = false;
+
+ if (MF) {
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+ }
+
+ static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass,
+ &X86::GR32RegClass, &X86::GR32RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass, 0
+ };
+ static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = {
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::GR64RegClass, &X86::GR64RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass,
+ &X86::VR128RegClass, &X86::VR128RegClass, 0
+ };
+
+ if (Is64Bit) {
+ if (IsWin64)
+ return CalleeSavedRegClassesWin64;
+ else
+ return (callsEHReturn ?
+ CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit);
+ } else {
+ return (callsEHReturn ?
+ CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit);
+ }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ // Set the stack-pointer register and its aliases as reserved.
+ Reserved.set(X86::RSP);
+ Reserved.set(X86::ESP);
+ Reserved.set(X86::SP);
+ Reserved.set(X86::SPL);
+ // Set the frame-pointer register and its aliases as reserved if needed.
+ if (hasFP(MF)) {
+ Reserved.set(X86::RBP);
+ Reserved.set(X86::EBP);
+ Reserved.set(X86::BP);
+ Reserved.set(X86::BPL);
+ }
+ // Mark the x87 stack registers as reserved, since they don't
+ // behave normally with respect to liveness. We don't fully
+ // model the effects of x87 stack pushes and pops after
+ // stackification.
+ Reserved.set(X86::ST0);
+ Reserved.set(X86::ST1);
+ Reserved.set(X86::ST2);
+ Reserved.set(X86::ST3);
+ Reserved.set(X86::ST4);
+ Reserved.set(X86::ST5);
+ Reserved.set(X86::ST6);
+ Reserved.set(X86::ST7);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
+ unsigned MaxAlign = 0;
+ for (int i = FFI->getObjectIndexBegin(),
+ e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ unsigned Align = FFI->getObjectAlignment(i);
+ MaxAlign = std::max(MaxAlign, Align);
+ }
+
+ return MaxAlign;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+
+ return (NoFramePointerElim ||
+ needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken() ||
+ MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+ (MMI && MMI->callsUnwindInit()));
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // FIXME: Currently we don't support stack realignment for functions with
+ // variable-sized allocas
+ return (RealignStack &&
+ (MFI->getMaxAlignment() > StackAlign &&
+ !MFI->hasVarSizedObjects()));
+}
+
+bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+int
+X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
+ int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
+ uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+
+ if (needsStackRealignment(MF)) {
+ if (FI < 0)
+ // Skip the saved EBP
+ Offset += SlotSize;
+ else {
+ unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
+ assert( (-(Offset + StackSize)) % Align == 0);
+ Align = 0;
+ return Offset + StackSize;
+ }
+
+ // FIXME: Support tail calls
+ } else {
+ if (!hasFP(MF))
+ return Offset + StackSize;
+
+ // Skip the saved EBP
+ Offset += SlotSize;
+
+ // Skip the RETADDR move area
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta;
+ }
+
+ return Offset;
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // If the stack pointer can be changed after prologue, turn the
+ // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+ // adjcallstackdown instruction into 'add ESP, <amt>'
+ // TODO: consider using push / pop instead of sub + store / add
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+ MachineInstr *New = 0;
+ if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+ New = BuildMI(MF, Old->getDebugLoc(),
+ TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+ StackPtr).addReg(StackPtr).addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+ // factor out the amount the callee already popped.
+ uint64_t CalleeAmt = Old->getOperand(1).getImm();
+ Amount -= CalleeAmt;
+ if (Amount) {
+ unsigned Opc = (Amount < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+ }
+ }
+
+ if (New) {
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+ } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+ if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+ unsigned Opc = (CalleeAmt < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ MachineInstr *Old = I;
+ MachineInstr *New =
+ BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
+ StackPtr).addReg(StackPtr).addImm(CalleeAmt);
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const{
+ assert(SPAdj == 0 && "Unexpected");
+
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+
+ unsigned BasePtr;
+ if (needsStackRealignment(MF))
+ BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+ else
+ BasePtr = (hasFP(MF) ? FramePtr : StackPtr);
+
+ // This must be part of a four operand memory reference. Replace the
+ // FrameIndex with base register with EBP. Add an offset to the offset.
+ MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+ // Now add the frame object offset to the offset from EBP.
+ if (MI.getOperand(i+3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Offset = getFrameIndexOffset(MF, FrameIndex) +
+ (int)(MI.getOperand(i+3).getImm());
+
+ MI.getOperand(i+3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
+ (uint64_t)MI.getOperand(i+3).getOffset();
+ MI.getOperand(i+3).setOffset(Offset);
+ }
+}
+
+void
+X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
+ calculateMaxStackAlignment(FFI));
+
+ FFI->setMaxAlignment(MaxAlign);
+}
+
+void
+X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0) {
+ // create RETURNADDR area
+ // arg
+ // arg
+ // RETADDR
+ // { ...
+ // RETADDR area
+ // ...
+ // }
+ // [EBP]
+ MF.getFrameInfo()->
+ CreateFixedObject(-TailCallReturnAddrDelta,
+ (-1*SlotSize)+TailCallReturnAddrDelta);
+ }
+ if (hasFP(MF)) {
+ assert((TailCallReturnAddrDelta <= 0) &&
+ "The Delta should always be zero or negative");
+ // Create a frame entry for the EBP register that must be saved.
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ (int)SlotSize * -2+
+ TailCallReturnAddrDelta);
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for EBP register must be last in order to be found!");
+ FrameIdx = 0;
+ }
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, int64_t NumBytes, bool Is64Bit,
+ const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ unsigned Opc = isSub
+ ? ((Offset < 128) ?
+ (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
+ (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
+ : ((Offset < 128) ?
+ (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
+ (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ uint64_t Chunk = (1LL << 31) - 1;
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ while (Offset) {
+ uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(ThisVal);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ Offset -= ThisVal;
+ }
+}
+
+// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ if (MBBI == MBB.begin()) return;
+
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ }
+}
+
+// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ return;
+
+ if (MBBI == MBB.end()) return;
+
+ MachineBasicBlock::iterator NI = next(MBBI);
+ if (NI == MBB.end()) return;
+
+ unsigned Opc = NI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes -= NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ NI->getOperand(0).getReg() == StackPtr) {
+ if (NumBytes)
+ *NumBytes += NI->getOperand(2).getImm();
+ MBB.erase(NI);
+ MBBI = NI;
+ }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted
+/// argument and the stack adjustment is returned as a positive value for ADD
+/// and a negative for SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned StackPtr,
+ bool doMergeWithPrevious) {
+
+ if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+ (!doMergeWithPrevious && MBBI == MBB.end()))
+ return 0;
+
+ int Offset = 0;
+
+ MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+ Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr){
+ Offset += PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+ Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ Offset -= PI->getOperand(2).getImm();
+ MBB.erase(PI);
+ if (!doMergeWithPrevious) MBBI = NI;
+ }
+
+ return Offset;
+}
+
+void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId,
+ unsigned ReadyLabelId) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ if (!MMI)
+ return;
+
+ uint64_t StackSize = MFI->getStackSize();
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ const TargetData *TD = MF.getTarget().getTargetData();
+
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth =
+ (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
+ TargetFrameInfo::StackGrowsUp ?
+ TD->getPointerSize() : -TD->getPointerSize());
+
+ if (StackSize) {
+ // Show update of SP.
+ if (hasFP(MF)) {
+ // Adjust SP
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ } else {
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP,
+ -StackSize+stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ } else {
+ //FIXME: Verify & implement for FP
+ MachineLocation SPDst(StackPtr);
+ MachineLocation SPSrc(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ // FIXME: This is dirty hack. The code itself is pretty mess right now.
+ // It should be rewritten from scratch and generalized sometimes.
+
+ // Determine maximum offset (minumum due to stack growth)
+ int64_t MaxOffset = 0;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I)
+ MaxOffset = std::min(MaxOffset,
+ MFI->getObjectOffset(CSI[I].getFrameIdx()));
+
+ // Calculate offsets
+ int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth;
+ for (unsigned I = 0, E = CSI.size(); I!=E; ++I) {
+ int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ Offset = (MaxOffset-Offset+saveAreaOffset);
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+
+ if (hasFP(MF)) {
+ // Save FP
+ MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth);
+ MachineLocation FPSrc(FramePtr);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+
+ MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+}
+
+
+void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function* Fn = MF.getFunction();
+ const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
+ !Fn->doesNotThrow() ||
+ UnwindTablesMandatory;
+ DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
+ DebugLoc::getUnknownLoc());
+
+ // Prepare for frame info.
+ unsigned FrameLabelId = 0;
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Get desired stack alignment
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+
+ // Add RETADDR move area to callee saved frame size.
+ int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+ if (TailCallReturnAddrDelta < 0)
+ X86FI->setCalleeSavedFrameSize(
+ X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta));
+
+ // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+ // function, and use up to 128 bytes of stack space, don't have a frame
+ // pointer, calls, or dynamic alloca then we do not need to adjust the
+ // stack pointer (we fit in the Red Zone).
+ if (Is64Bit && !DisableRedZone &&
+ !needsStackRealignment(MF) &&
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->hasCalls()) { // No calls.
+ uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ if (hasFP(MF)) MinSize += SlotSize;
+ StackSize = std::max(MinSize,
+ StackSize > 128 ? StackSize - 128 : 0);
+ MFI->setStackSize(StackSize);
+ }
+
+ // Insert stack pointer adjustment for later moving of return addr. Only
+ // applies to tail call optimized functions where the callee argument stack
+ // size is bigger than the callers.
+ if (TailCallReturnAddrDelta < 0) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+
+ uint64_t NumBytes = 0;
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+ // Get the offset of the stack slot for the EBP register... which is
+ // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+ // Update the frame offset adjustment.
+ MFI->setOffsetAdjustment(-NumBytes);
+
+ // Save EBP into the appropriate stack slot...
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(FramePtr, RegState::Kill);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+ }
+
+ // Update EBP with the new base value...
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+ .addReg(StackPtr);
+
+ // Mark the FramePtr as live-in in every block except the entry.
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I)
+ I->addLiveIn(FramePtr);
+
+ // Realign stack
+ if (needsStackRealignment(MF)) {
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+ StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+ // The EFLAGS implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ }
+ } else {
+ NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ }
+
+ unsigned ReadyLabelId = 0;
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ }
+
+ // Skip the callee-saved push instructions.
+ while (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == X86::PUSH32r ||
+ MBBI->getOpcode() == X86::PUSH64r))
+ ++MBBI;
+
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+
+ if (NumBytes) { // adjust stack pointer: ESP -= numbytes
+ if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+ // Check, whether EAX is livein for this function
+ bool isEAXAlive = false;
+ for (MachineRegisterInfo::livein_iterator
+ II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
+ unsigned Reg = II->first;
+ isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL);
+ }
+
+ // Function prologue calls _alloca to probe the stack when allocating
+ // more than 4k bytes in one go. Touching the stack at 4K increments is
+ // necessary to ensure that the guard pages used by the OS virtual memory
+ // manager are allocated in correct sequence.
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ } else {
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes-4);
+ MBB.insert(MBBI, MI);
+ }
+ } else {
+ // If there is an SUB32ri of ESP immediately before this instruction,
+ // merge the two. This can be the case when tail call elimination is
+ // enabled and the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
+ }
+ }
+
+ if (needsFrameMoves)
+ emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+}
+
+void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc DL = MBBI->getDebugLoc();
+
+ switch (RetOpcode) {
+ case X86::RET:
+ case X86::RETI:
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNdi64:
+ case X86::EH_RETURN:
+ case X86::EH_RETURN64:
+ case X86::TAILJMPd:
+ case X86::TAILJMPr:
+ case X86::TAILJMPm: break; // These are ok
+ default:
+ assert(0 && "Can only insert epilog into returning blocks");
+ }
+
+ // Get the number of bytes to allocate from the FrameInfo
+ uint64_t StackSize = MFI->getStackSize();
+ uint64_t MaxAlign = MFI->getMaxAlignment();
+ unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t NumBytes = 0;
+
+ if (hasFP(MF)) {
+ // Calculate required stack adjustment
+ uint64_t FrameSize = StackSize - SlotSize;
+ if (needsStackRealignment(MF))
+ FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+ NumBytes = FrameSize - CSSize;
+
+ // pop EBP.
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+ } else {
+ NumBytes = StackSize - CSSize;
+ }
+
+ // Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator LastCSPop = MBBI;
+ while (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PI = prior(MBBI);
+ unsigned Opc = PI->getOpcode();
+ if (Opc != X86::POP32r && Opc != X86::POP64r &&
+ !PI->getDesc().isTerminator())
+ break;
+ --MBBI;
+ }
+
+ DL = MBBI->getDebugLoc();
+
+ // If there is an ADD32ri or SUB32ri of ESP immediately before this
+ // instruction, merge the two instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+ // If dynamic alloca is used, then reset esp to point to the last callee-saved
+ // slot before popping them off! Same applies for the case, when stack was
+ // realigned
+ if (needsStackRealignment(MF)) {
+ // We cannot use LEA here, because stack pointer was realigned. We need to
+ // deallocate local frame back
+ if (CSSize) {
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ MBBI = prior(LastCSPop);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+ } else if (MFI->hasVarSizedObjects()) {
+ if (CSSize) {
+ unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+ MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
+ MBB.insert(MBBI, MI);
+ } else
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(FramePtr);
+
+ } else {
+ // adjust stack pointer back: ESP += numbytes
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII);
+ }
+
+ // We're returning from function via eh_return.
+ if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+ MBBI = prior(MBB.end());
+ MachineOperand &DestAddr = MBBI->getOperand(0);
+ assert(DestAddr.isReg() && "Offset should be in register!");
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+ StackPtr).addReg(DestAddr.getReg());
+ // Tail call return: adjust the stack pointer and jump to callee
+ } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+ RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+ int Offset = 0;
+ assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+ // Incoporate the retaddr area.
+ Offset = StackAdj-MaxTCDelta;
+ assert(Offset >= 0 && "Offset should never be negative");
+
+ if (Offset) {
+ // Check for possible merge with preceeding ADD instruction.
+ Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII);
+ }
+
+ // Jump to label or value in register.
+ if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ else if (RetOpcode== X86::TCRETURNri64)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
+ else
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+ (X86FI->getTCReturnAddrDelta() < 0)) {
+ // Add the return addr area delta back since we are not tail calling.
+ int delta = -1*X86FI->getTCReturnAddrDelta();
+ MBBI = prior(MBB.end());
+ // Check for possible merge with preceeding ADD instruction.
+ delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII);
+ }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+ if (Is64Bit)
+ return X86::RIP; // Should have dwarf #16
+ else
+ return X86::EIP; // Should have dwarf #8
+}
+
+unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ return hasFP(MF) ? FramePtr : StackPtr;
+}
+
+void X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Calculate amount of bytes used for return address storing
+ int stackGrowth = (Is64Bit ? -8 : -4);
+
+ // Initial state of the frame pointer is esp+4.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(StackPtr, stackGrowth);
+ Moves.push_back(MachineMove(0, Dst, Src));
+
+ // Add return address to move list
+ MachineLocation CSDst(StackPtr, stackGrowth);
+ MachineLocation CSSrc(getRARegister());
+ Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+ assert(0 && "What is the exception register");
+ return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+ assert(0 && "What is the exception handler register");
+ return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) {
+ switch (VT.getSimpleVT()) {
+ default: return Reg;
+ case MVT::i8:
+ if (High) {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case MVT::i16:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case MVT::i32:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case MVT::i64:
+ switch (Reg) {
+ default: return Reg;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+
+ return Reg;
+}
+}
+
+#include "X86GenRegisterInfo.inc"
+
+namespace {
+ struct VISIBILITY_HIDDEN MSAC : public MachineFunctionPass {
+ static char ID;
+ MSAC() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+ // Be over-conservative: scan over all vreg defs and find, whether vector
+ // registers are used. If yes - there is probability, that vector register
+ // will be spilled and thus stack needs to be aligned properly.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ FFI->setMaxAlignment(MaxAlign);
+
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 Maximal Stack Alignment Calculator";
+ }
+ };
+
+ char MSAC::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 000000000000..33b9f5edc73a
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,163 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "X86GenRegisterInfo.h.inc"
+
+namespace llvm {
+ class Type;
+ class TargetInstrInfo;
+ class X86TargetMachine;
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+ enum {
+ EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+ };
+}
+
+namespace X86 {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// X86RegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
+ };
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+ enum {
+ X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+ };
+}
+
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+ X86TargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+private:
+ /// Is64Bit - Is the target 64-bits.
+ ///
+ bool Is64Bit;
+
+ /// IsWin64 - Is the target on of win64 flavours
+ ///
+ bool IsWin64;
+
+ /// SlotSize - Stack slot size in bytes.
+ ///
+ unsigned SlotSize;
+
+ /// StackAlign - Default stack alignment.
+ ///
+ unsigned StackAlign;
+
+ /// StackPtr - X86 physical register used as stack ptr.
+ ///
+ unsigned StackPtr;
+
+ /// FramePtr - X86 physical register used as frame ptr.
+ ///
+ unsigned FramePtr;
+
+public:
+ X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+ /// getX86RegNum - Returns the native X86 register number for the given LLVM
+ /// register identifier.
+ static unsigned getX86RegNum(unsigned RegNo);
+
+ unsigned getStackAlignment() const { return StackAlign; }
+
+ /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
+ /// (created by TableGen) for target dependencies.
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ /// Code Generation virtual methods...
+ ///
+
+ /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+ /// values.
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ /// getCrossCopyRegClass - Returns a legal register class to copy a register
+ /// in the specified class to or from. Returns NULL if it is possible to copy
+ /// between a two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+ /// getCalleeSavedRegs - Return a null-terminated list of all of the
+ /// callee-save registers on this target.
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred
+ /// register classes to spill each callee-saved register with. The order and
+ /// length of this list match the getCalleeSavedRegs() list.
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+ /// getReservedRegs - Returns a bitset indexed by physical register number
+ /// indicating if a register is a special register that has particular uses and
+ /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+ /// register scavenger to determine what registers are free.
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
+ bool hasReservedCallFrame(MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ void emitFrameMoves(MachineFunction &MF,
+ unsigned FrameLabelId, unsigned ReadyLabelId) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ int getFrameIndexOffset(MachineFunction &MF, int FI) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ // Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 000000000000..d552cb3ab8e9
--- /dev/null
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,762 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Register definitions...
+//
+let Namespace = "X86" in {
+
+ // In the register alias definitions below, we define which registers alias
+ // which others. We only specify which registers the small registers alias,
+ // because the register file generator is smart enough to figure out that
+ // AL aliases AX if we tell it that AX aliased AL (for example).
+
+ // Dwarf numbering is different for 32-bit and 64-bit, and there are
+ // variations by target as well. Currently the first entry is for X86-64,
+ // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+ // and debug information on X86-32/Darwin)
+
+ // 8-bit registers
+ // Low registers
+ def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>;
+ def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>;
+ def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
+ def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
+
+ // X86-64 only
+ def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
+ def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
+ def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
+ def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>;
+ def R8B : Register<"r8b">, DwarfRegNum<[8, -2, -2]>;
+ def R9B : Register<"r9b">, DwarfRegNum<[9, -2, -2]>;
+ def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>;
+ def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>;
+ def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>;
+ def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
+ def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
+ def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+
+ // High registers. On x86-64, these cannot be used in any instruction
+ // with a REX prefix.
+ def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
+ def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
+ def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
+ def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
+
+ // 16-bit registers
+ def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
+ def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
+ def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
+ def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
+ def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
+ def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
+ def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ def IP : Register<"ip">, DwarfRegNum<[16]>;
+
+ // X86-64 only
+ def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
+ def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
+ def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
+ def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>;
+ def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>;
+ def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
+ def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
+ def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
+
+ // 32-bit registers
+ def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
+ def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
+ def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
+ def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>;
+ def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>;
+ def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
+ def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
+ def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
+ def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
+ // X86-64 only
+ def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
+ def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
+ def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
+ def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>;
+ def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>;
+ def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
+ def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
+ def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+
+ // 64-bit registers, X86-64 only
+ def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+ def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+ def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+ def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+ def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+ def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+ def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+ def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+ def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+ def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+ def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+ def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+ def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+ def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+ def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+ def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+ def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+
+ // MMX Registers. These are actually aliased to ST0 .. ST7
+ def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+ def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+ def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+ def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+ def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+ def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+ def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+ def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+ // Pseudo Floating Point registers
+ def FP0 : Register<"fp0">;
+ def FP1 : Register<"fp1">;
+ def FP2 : Register<"fp2">;
+ def FP3 : Register<"fp3">;
+ def FP4 : Register<"fp4">;
+ def FP5 : Register<"fp5">;
+ def FP6 : Register<"fp6">;
+
+ // XMM Registers, used by the various SSE instruction set extensions
+ def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+ def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+ def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+ def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+ def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+ def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+ def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+ def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+ // X86-64 only
+ def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
+ def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
+ def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+ def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+ def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+ def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+ def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+ def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+
+ // Floating point stack registers
+ def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
+ def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
+ def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>;
+ def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>;
+ def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
+ def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
+ def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
+ def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+
+ // Status flags register
+ def EFLAGS : Register<"flags">;
+
+ // Segment registers
+ def CS : Register<"cs">;
+ def DS : Register<"ds">;
+ def SS : Register<"ss">;
+ def ES : Register<"es">;
+ def FS : Register<"fs">;
+ def GS : Register<"gs">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def x86_subreg_8bit : PatLeaf<(i32 1)>;
+def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
+def x86_subreg_16bit : PatLeaf<(i32 3)>;
+def x86_subreg_32bit : PatLeaf<(i32 4)>;
+
+def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [AX, CX, DX, BX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [EAX, ECX, EDX, EBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+ [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
+ R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [RAX, RCX, RDX, RBX],
+ [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [AX, CX, DX, BX, SP, BP, SI, DI,
+ R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
+ R8, R9, R10, R11, R12, R13, R14, R15],
+ [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+ R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes. The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
+ R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_AO_64_fp[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B
+ };
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_AO_64[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
+ X86::R8B, X86::R9B, X86::R10B, X86::R11B,
+ X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
+ };
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_AO_32[] = {
+ X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
+ };
+
+ GR8Class::iterator
+ GR8Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32;
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp;
+ else
+ return X86_GR8_AO_64;
+ }
+
+ GR8Class::iterator
+ GR8Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
+ else if (RI->hasFP(MF))
+ return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP,
+ R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+ let SubRegClassList = [GR8, GR8];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_64_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W
+ };
+ static const unsigned X86_GR16_AO_32_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
+ };
+ // If not, just don't allocate SP.
+ static const unsigned X86_GR16_AO_64[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
+ X86::R8W, X86::R9W, X86::R10W, X86::R11W,
+ X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP
+ };
+ static const unsigned X86_GR16_AO_32[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
+ };
+
+ GR16Class::iterator
+ GR16Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp;
+ else
+ return X86_GR16_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp;
+ else
+ return X86_GR16_AO_32;
+ }
+ }
+
+ GR16Class::iterator
+ GR16Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+ R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+ let SubRegClassList = [GR8, GR8, GR16];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_AO_64_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D
+ };
+ static const unsigned X86_GR32_AO_32_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+ };
+ // If not, just don't allocate ESP.
+ static const unsigned X86_GR32_AO_64[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
+ X86::R8D, X86::R9D, X86::R10D, X86::R11D,
+ X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
+ };
+ static const unsigned X86_GR32_AO_32[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+ };
+
+ GR32Class::iterator
+ GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp;
+ else
+ return X86_GR32_AO_64;
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp;
+ else
+ return X86_GR32_AO_32;
+ }
+ }
+
+ GR32Class::iterator
+ GR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (Subtarget.is64Bit()) {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
+ } else {
+ if (RI->hasFP(MF))
+ return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
+ else
+ return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
+ }
+ }
+ }];
+}
+
+
+def GR64 : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP]> {
+ let SubRegClassList = [GR8, GR8, GR16, GR32];
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GR64Class::iterator
+ GR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return begin(); // None of these are allocatable in 32-bit.
+ if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
+ return end()-2; // If so, don't allocate RSP or RBP
+ else
+ return end()-1; // If not, just don't allocate RSP
+ }
+ }];
+}
+
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
+}
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
+}
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+ let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+}
+
+// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
+// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
+// of registers which do not by themselves require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+ [AL, CL, DL, BL, AH, CH, DH, BH,
+ SIL, DIL, BPL, SPL]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SPL or BPL.
+ static const unsigned X86_GR8_NOREX_AO_64_fp[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL
+ };
+ // If not, just don't allocate SPL.
+ static const unsigned X86_GR8_NOREX_AO_64[] = {
+ X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL
+ };
+ // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+ static const unsigned X86_GR8_NOREX_AO_32[] = {
+ X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH
+ };
+
+ GR8_NOREXClass::iterator
+ GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_NOREX_AO_32;
+ else if (RI->hasFP(MF))
+ return X86_GR8_NOREX_AO_64_fp;
+ else
+ return X86_GR8_NOREX_AO_64;
+ }
+
+ GR8_NOREXClass::iterator
+ GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return X86_GR8_NOREX_AO_32 +
+ (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned));
+ else if (RI->hasFP(MF))
+ return X86_GR8_NOREX_AO_64_fp +
+ (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned));
+ else
+ return X86_GR8_NOREX_AO_64 +
+ (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned));
+ }
+ }];
+}
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+ [AX, CX, DX, SI, DI, BX, BP, SP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate SP or BP.
+ static const unsigned X86_GR16_AO_fp[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX
+ };
+ // If not, just don't allocate SP.
+ static const unsigned X86_GR16_AO[] = {
+ X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP
+ };
+
+ GR16_NOREXClass::iterator
+ GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_fp;
+ else
+ return X86_GR16_AO;
+ }
+
+ GR16_NOREXClass::iterator
+ GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned));
+ else
+ return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned));
+ }
+ }];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+ [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate ESP or EBP.
+ static const unsigned X86_GR32_NOREX_AO_fp[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+ };
+ // If not, just don't allocate ESP.
+ static const unsigned X86_GR32_NOREX_AO[] = {
+ X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+ };
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp;
+ else
+ return X86_GR32_NOREX_AO;
+ }
+
+ GR32_NOREXClass::iterator
+ GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR32_NOREX_AO_fp +
+ (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR32_NOREX_AO +
+ (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
+}
+
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+ [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
+ let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // Does the function dedicate RBP / EBP to being a frame ptr?
+ // If so, don't allocate RSP or RBP.
+ static const unsigned X86_GR64_NOREX_AO_fp[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
+ };
+ // If not, just don't allocate RSP.
+ static const unsigned X86_GR64_NOREX_AO[] = {
+ X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+ };
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp;
+ else
+ return X86_GR64_NOREX_AO;
+ }
+
+ GR64_NOREXClass::iterator
+ GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return X86_GR64_NOREX_AO_fp +
+ (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+ else
+ return X86_GR64_NOREX_AO +
+ (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+ }
+ }];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR32Class::iterator
+ FR32Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+def FR64 : RegisterClass<"X86", [f64], 64,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FR64Class::iterator
+ FR64Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values. This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware. In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32,
+ [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ RSTClass::iterator
+ RSTClass::allocation_order_end(const MachineFunction &MF) const {
+ return begin();
+ }
+ }];
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+ [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11,
+ XMM12, XMM13, XMM14, XMM15]> {
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR128Class::iterator
+ VR128Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+}
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
new file mode 100644
index 000000000000..b225f480e4ed
--- /dev/null
+++ b/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,42 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace X86 {
+ /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// the terminology here doesn't follow x86 convention - word means
+ /// 32-bit and dword means 64-bit.
+ enum RelocationType {
+ // reloc_pcrel_word - PC relative relocation, add the relocated value to
+ // the value already in memory, after we adjust it for where the PC is.
+ reloc_pcrel_word = 0,
+
+ // reloc_picrel_word - PIC base relative relocation, add the relocated
+ // value to the value already in memory, after we adjust it for where the
+ // PIC base is.
+ reloc_picrel_word = 1,
+
+ // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
+ // add the relocated value to the value already in memory.
+ reloc_absolute_word = 2,
+ reloc_absolute_dword = 3
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 000000000000..03ce1aee0e8a
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,446 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86GenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#if defined(_MSC_VER)
+ #include <intrin.h>
+#endif
+
+static cl::opt<X86Subtarget::AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset),
+ cl::desc("Choose style of code to emit from X86 backend:"),
+ cl::values(
+ clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"),
+ clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"),
+ clEnumValEnd));
+
+
+/// True if accessing the GV requires an extra load. For Windows, dllimported
+/// symbols are indirect, loading the value at address GV rather then the
+/// value of GV itself. This means that the GlobalAddress must be in the base
+/// or index register of the address, not the GV offset field.
+bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const
+{
+ // FIXME: PIC
+ if (TM.getRelocationModel() != Reloc::Static &&
+ TM.getCodeModel() != CodeModel::Large) {
+ if (isTargetDarwin()) {
+ if (isDirectCall)
+ return false;
+ bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+ if (GV->hasHiddenVisibility() &&
+ (Is64Bit || (!isDecl && !GV->hasCommonLinkage())))
+ // If symbol visibility is hidden, the extra load is not needed if
+ // target is x86-64 or the symbol is definitely defined in the current
+ // translation unit.
+ return false;
+ return !isDirectCall && (isDecl || GV->isWeakForLinker());
+ } else if (isTargetELF()) {
+ // Extra load is needed for all externally visible.
+ if (isDirectCall)
+ return false;
+ if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+ return false;
+ return true;
+ } else if (isTargetCygMing() || isTargetWindows()) {
+ return (GV->hasDLLImportLinkage());
+ }
+ }
+ return false;
+}
+
+/// True if accessing the GV requires a register. This is a superset of the
+/// cases where GVRequiresExtraLoad is true. Some variations of PIC require
+/// a register, but not an extra load.
+bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV,
+ const TargetMachine& TM,
+ bool isDirectCall) const
+{
+ if (GVRequiresExtraLoad(GV, TM, isDirectCall))
+ return true;
+ // Code below here need only consider cases where GVRequiresExtraLoad
+ // returns false.
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return !isDirectCall &&
+ (GV->hasLocalLinkage() || GV->hasExternalLinkage());
+ return false;
+}
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+ // Darwin 10 has a __bzero entry point for this purpose.
+ if (getDarwinVers() >= 10)
+ return "__bzero";
+
+ return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+ if (Is64Bit)
+ return false;
+ return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+ // For x86 out-of-order targets, back-schedule address computations so
+ // that loads and stores aren't blocked.
+ // This value was chosen arbitrarily.
+ return 200;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+ return;
+
+ X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+
+ if ((EDX >> 23) & 0x1) X86SSELevel = MMX;
+ if ((EDX >> 25) & 0x1) X86SSELevel = SSE1;
+ if ((EDX >> 26) & 0x1) X86SSELevel = SSE2;
+ if (ECX & 0x1) X86SSELevel = SSE3;
+ if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3;
+ if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
+ if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
+
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+ IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ HasX86_64 = (EDX >> 29) & 0x1;
+ HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+ }
+}
+
+static const char *GetCurrentX86CPU() {
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+
+ X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+ bool HasSSE3 = (ECX & 0x1);
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 4: return "pentium-mmx";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: return "pentiumpro";
+ case 3:
+ case 5:
+ case 6: return "pentium2";
+ case 7:
+ case 8:
+ case 10:
+ case 11: return "pentium3";
+ case 9:
+ case 13: return "pentium-m";
+ case 14: return "yonah";
+ case 15:
+ case 22: // Celeron M 540
+ return "core2";
+ case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ return "penryn";
+ default: return "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 3:
+ case 4:
+ case 6: // same as 4, but 65nm
+ return (Em64T) ? "nocona" : "prescott";
+ case 26:
+ return "corei7";
+ case 28:
+ return "atom";
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ if (HasSSE3) {
+ switch (Model) {
+ default: return "k8-sse3";
+ }
+ } else {
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ }
+ case 16:
+ switch (Model) {
+ default: return "amdfam10";
+ }
+ default:
+ return "generic";
+ }
+ } else {
+ return "generic";
+ }
+}
+
+X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
+ : AsmFlavor(AsmWriterFlavor)
+ , PICStyle(PICStyles::None)
+ , X86SSELevel(NoMMXSSE)
+ , X863DNowLevel(NoThreeDNow)
+ , HasX86_64(false)
+ , IsBTMemSlow(false)
+ , DarwinVers(0)
+ , IsLinux(false)
+ , stackAlignment(8)
+ // FIXME: this is a known good value for Yonah. How about others?
+ , MaxInlineSizeThreshold(128)
+ , Is64Bit(is64Bit)
+ , TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+ // Determine default and user specified characteristics
+ if (!FS.empty()) {
+ // If feature string is not empty, parse features string.
+ std::string CPU = GetCurrentX86CPU();
+ ParseSubtargetFeatures(FS, CPU);
+ // All X86-64 CPUs also have SSE2, however user might request no SSE via
+ // -mattr, so don't force SSELevel here.
+ } else {
+ // Otherwise, use CPUID to auto-detect feature set.
+ AutoDetectSubtargetFeatures();
+ // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
+ if (Is64Bit && X86SSELevel < SSE2)
+ X86SSELevel = SSE2;
+ }
+
+ // If requesting codegen for X86-64, make sure that 64-bit features
+ // are enabled.
+ if (Is64Bit)
+ HasX86_64 = true;
+
+ DOUT << "Subtarget features: SSELevel " << X86SSELevel
+ << ", 3DNowLevel " << X863DNowLevel
+ << ", 64bit " << HasX86_64 << "\n";
+ assert((!Is64Bit || HasX86_64) &&
+ "64-bit code requested on a subtarget that doesn't support it!");
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ size_t Pos;
+ if ((Pos = TT.find("-darwin")) != std::string::npos) {
+ TargetType = isDarwin;
+
+ // Compute the darwin version number.
+ if (isdigit(TT[Pos+7]))
+ DarwinVers = atoi(&TT[Pos+7]);
+ else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+ } else if (TT.find("linux") != std::string::npos) {
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
+ } else if (TT.find("cygwin") != std::string::npos) {
+ TargetType = isCygwin;
+ } else if (TT.find("mingw") != std::string::npos) {
+ TargetType = isMingw;
+ } else if (TT.find("win32") != std::string::npos) {
+ TargetType = isWindows;
+ } else if (TT.find("windows") != std::string::npos) {
+ TargetType = isWindows;
+ }
+ else if (TT.find("-cl") != std::string::npos) {
+ TargetType = isDarwin;
+ DarwinVers = 9;
+ }
+ } else if (TT.empty()) {
+#if defined(__CYGWIN__)
+ TargetType = isCygwin;
+#elif defined(__MINGW32__) || defined(__MINGW64__)
+ TargetType = isMingw;
+#elif defined(__APPLE__)
+ TargetType = isDarwin;
+#if __APPLE_CC__ > 5400
+ DarwinVers = 9; // GCC 5400+ is Leopard.
+#else
+ DarwinVers = 8; // Minimum supported darwin is Tiger.
+#endif
+
+#elif defined(_WIN32) || defined(_WIN64)
+ TargetType = isWindows;
+#elif defined(__linux__)
+ // Linux doesn't imply ELF, but we don't currently support anything else.
+ TargetType = isELF;
+ IsLinux = true;
+#endif
+ }
+
+ // If the asm syntax hasn't been overridden on the command line, use whatever
+ // the target wants.
+ if (AsmFlavor == X86Subtarget::Unset) {
+ AsmFlavor = (TargetType == isWindows)
+ ? X86Subtarget::Intel : X86Subtarget::ATT;
+ }
+
+ // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
+ // bit targets.
+ if (TargetType == isDarwin || Is64Bit)
+ stackAlignment = 16;
+
+ if (StackAlignment)
+ stackAlignment = StackAlignment;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 000000000000..46476f20400a
--- /dev/null
+++ b/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,224 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+class GlobalValue;
+class TargetMachine;
+
+namespace PICStyles {
+enum Style {
+ Stub, GOT, RIPRel, WinPIC, None
+};
+}
+
+class X86Subtarget : public TargetSubtarget {
+public:
+ enum AsmWriterFlavorTy {
+ // Note: This numbering has to match the GCC assembler dialects for inline
+ // asm alternatives to work right.
+ ATT = 0, Intel = 1, Unset
+ };
+protected:
+ enum X86SSEEnum {
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+ };
+
+ enum X863DNowEnum {
+ NoThreeDNow, ThreeDNow, ThreeDNowA
+ };
+
+ /// AsmFlavor - Which x86 asm dialect to use.
+ ///
+ AsmWriterFlavorTy AsmFlavor;
+
+ /// PICStyle - Which PIC style to use
+ ///
+ PICStyles::Style PICStyle;
+
+ /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+ /// none supported.
+ X86SSEEnum X86SSELevel;
+
+ /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+ ///
+ X863DNowEnum X863DNowLevel;
+
+ /// HasX86_64 - True if the processor supports X86-64 instructions.
+ ///
+ bool HasX86_64;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
+ /// HasSSE4A - True if the processor supports SSE4A instructions.
+ bool HasSSE4A;
+
+ /// DarwinVers - Nonzero if this is a darwin platform: the numeric
+ /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
+ unsigned char DarwinVers; // Is any darwin-x86 platform.
+
+ /// isLinux - true if this is a "linux" platform.
+ bool IsLinux;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ ///
+ unsigned MaxInlineSizeThreshold;
+
+private:
+ /// Is64Bit - True if the processor supports 64-bit instructions and module
+ /// pointer size is 64 bit.
+ bool Is64Bit;
+
+public:
+ enum {
+ isELF, isCygwin, isDarwin, isWindows, isMingw
+ } TargetType;
+
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+ /// instruction.
+ void AutoDetectSubtargetFeatures();
+
+ bool is64Bit() const { return Is64Bit; }
+
+ PICStyles::Style getPICStyle() const { return PICStyle; }
+ void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
+
+ bool hasMMX() const { return X86SSELevel >= MMX; }
+ bool hasSSE1() const { return X86SSELevel >= SSE1; }
+ bool hasSSE2() const { return X86SSELevel >= SSE2; }
+ bool hasSSE3() const { return X86SSELevel >= SSE3; }
+ bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+ bool hasSSE41() const { return X86SSELevel >= SSE41; }
+ bool hasSSE42() const { return X86SSELevel >= SSE42; }
+ bool hasSSE4A() const { return HasSSE4A; }
+ bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+ bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+
+ unsigned getAsmFlavor() const {
+ return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
+ }
+
+ bool isFlavorAtt() const { return AsmFlavor == ATT; }
+ bool isFlavorIntel() const { return AsmFlavor == Intel; }
+
+ bool isTargetDarwin() const { return TargetType == isDarwin; }
+ bool isTargetELF() const {
+ return TargetType == isELF;
+ }
+ bool isTargetWindows() const { return TargetType == isWindows; }
+ bool isTargetMingw() const { return TargetType == isMingw; }
+ bool isTargetCygMing() const { return (TargetType == isMingw ||
+ TargetType == isCygwin); }
+ bool isTargetCygwin() const { return TargetType == isCygwin; }
+ bool isTargetWin64() const {
+ return (Is64Bit && (TargetType == isMingw || TargetType == isWindows));
+ }
+
+ std::string getDataLayout() const {
+ const char *p;
+ if (is64Bit())
+ p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
+ else {
+ if (isTargetDarwin())
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+ else
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
+ }
+ return std::string(p);
+ }
+
+ bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+ bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+ bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
+ bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+ bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+
+ /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+ unsigned getDarwinVers() const { return DarwinVers; }
+
+ /// isLinux - Return true if the target is "Linux".
+ bool isLinux() const { return IsLinux; }
+
+ /// True if accessing the GV requires an extra load. For Windows, dllimported
+ /// symbols are indirect, loading the value at address GV rather then the
+ /// value of GV itself. This means that the GlobalAddress must be in the base
+ /// or index register of the address, not the GV offset field.
+ bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+
+ /// True if accessing the GV requires a register. This is a superset of the
+ /// cases where GVRequiresExtraLoad is true. Some variations of PIC require
+ /// a register, but not an extra load.
+ bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM,
+ bool isDirectCall) const;
+
+ /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+ /// to immediate address.
+ bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
+
+ /// getSpecialAddressLatency - For targets where it is beneficial to
+ /// backschedule instructions that compute addresses, return a value
+ /// indicating the number of scheduling cycles of backscheduling that
+ /// should be attempted.
+ unsigned getSpecialAddressLatency() const;
+};
+
+namespace X86 {
+ /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+ /// the specified arguments. If we can't run cpuid on the host, return true.
+ bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
+ unsigned *rECX, unsigned *rEDX);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
new file mode 100644
index 000000000000..5dda5f4e49b0
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -0,0 +1,461 @@
+//===-- X86TargetAsmInfo.cpp - X86 asm properties ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86TargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+const char *const llvm::x86_asm_table[] = {
+ "{si}", "S",
+ "{di}", "D",
+ "{ax}", "a",
+ "{cx}", "c",
+ "{memory}", "memory",
+ "{flags}", "",
+ "{dirflag}", "",
+ "{fpsr}", "",
+ "{cc}", "cc",
+ 0,0};
+
+X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
+ X86TargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+ const X86Subtarget* Subtarget = &TM.getSubtarget<X86Subtarget>();
+ bool is64Bit = Subtarget->is64Bit();
+
+ AlignmentIsInBytes = false;
+ TextAlignFillValue = 0x90;
+ GlobalPrefix = "_";
+ if (!is64Bit)
+ Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
+ PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
+ LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata
+ BSSSection = 0; // no BSS section.
+ ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
+ if (TM.getRelocationModel() != Reloc::Static)
+ ConstantPoolSection = "\t.const_data";
+ else
+ ConstantPoolSection = "\t.const\n";
+ JumpTableDataSection = "\t.const\n";
+ CStringSection = "\t.cstring";
+ // FIXME: Why don't always use this section?
+ if (is64Bit) {
+ SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
+ SectionFlags::Mergeable);
+ }
+ LCOMMDirective = "\t.lcomm\t";
+ SwitchToSectionDirective = "\t.section ";
+ StringConstantPrefix = "\1LC";
+ // Leopard and above support aligned common symbols.
+ COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ NonLocalEHFrameLabel = true;
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+ if (is64Bit) {
+ PersonalityPrefix = "";
+ PersonalitySuffix = "+4@GOTPCREL";
+ } else {
+ PersonalityPrefix = "L";
+ PersonalitySuffix = "$non_lazy_ptr";
+ }
+ NeedsIndirectEncoding = true;
+ InlineAsmStart = "## InlineAsm Start";
+ InlineAsmEnd = "## InlineAsm End";
+ CommentString = "##";
+ SetDirective = "\t.set";
+ PCSymbol = ".";
+ UsedDirective = "\t.no_dead_strip\t";
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ HiddenDirective = "\t.private_extern ";
+ ProtectedDirective = "\t.globl\t";
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping.
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ // Emit a local label that is preserved until the linker runs.
+ JumpTableSpecialLabelPrefix = "l";
+ }
+
+ SupportsDebugInformation = true;
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
+ DwarfUsesInlineInfoSection = true;
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+
+ // Exceptions handling
+ SupportsExceptionHandling = true;
+ GlobalEHDirective = "\t.globl\t";
+ SupportsWeakOmittedEHFrame = false;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection =
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
+}
+
+unsigned
+X86DarwinTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ if (Reason == DwarfEncoding::Functions && Global)
+ return (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4);
+ else if (Reason == DwarfEncoding::CodeLabels || !Global)
+ return DW_EH_PE_pcrel;
+ else
+ return DW_EH_PE_absptr;
+}
+
+const char *
+X86DarwinTargetAsmInfo::getEHGlobalPrefix() const
+{
+ const X86Subtarget* Subtarget = &TM.getSubtarget<X86Subtarget>();
+ if (Subtarget->getDarwinVers() > 9)
+ return PrivateGlobalPrefix;
+ else
+ return "";
+}
+
+X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
+ X86TargetAsmInfo<ELFTargetAsmInfo>(TM) {
+
+ CStringSection = ".rodata.str";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+ PCSymbol = ".";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+
+ // Debug Information
+ AbsoluteDebugSectionOffsets = true;
+ SupportsDebugInformation = true;
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+
+ // Exceptions handling
+ SupportsExceptionHandling = true;
+ AbsoluteEHSectionOffsets = false;
+ DwarfEHFrameSection = "\t.section\t.eh_frame,\"aw\",@progbits";
+ DwarfExceptionSection = "\t.section\t.gcc_except_table,\"a\",@progbits";
+
+ // On Linux we must declare when we can use a non-executable stack.
+ if (TM.getSubtarget<X86Subtarget>().isLinux())
+ NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits";
+}
+
+unsigned
+X86ELFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ CodeModel::Model CM = TM.getCodeModel();
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ unsigned Format = 0;
+
+ if (!is64Bit)
+ // 32 bit targets always encode pointers as 4 bytes
+ Format = DW_EH_PE_sdata4;
+ else {
+ // 64 bit targets encode pointers in 4 bytes iff:
+ // - code model is small OR
+ // - code model is medium and we're emitting externally visible symbols
+ // or any code symbols
+ if (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && (Global ||
+ Reason != DwarfEncoding::Data)))
+ Format = DW_EH_PE_sdata4;
+ else
+ Format = DW_EH_PE_sdata8;
+ }
+
+ if (Global)
+ Format |= DW_EH_PE_indirect;
+
+ return (Format | DW_EH_PE_pcrel);
+ } else {
+ if (is64Bit &&
+ (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && Reason != DwarfEncoding::Data)))
+ return DW_EH_PE_udata4;
+ else
+ return DW_EH_PE_absptr;
+ }
+}
+
+X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
+ X86GenericTargetAsmInfo(TM) {
+
+ GlobalPrefix = "_";
+ LCOMMDirective = "\t.lcomm\t";
+ COMMDirectiveTakesAlignment = false;
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+ StaticCtorsSection = "\t.section .ctors,\"aw\"";
+ StaticDtorsSection = "\t.section .dtors,\"aw\"";
+ HiddenDirective = NULL;
+ PrivateGlobalPrefix = "L"; // Prefix for private global symbols
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ AbsoluteDebugSectionOffsets = true;
+ AbsoluteEHSectionOffsets = false;
+ SupportsDebugInformation = true;
+ DwarfSectionOffsetDirective = "\t.secrel32\t";
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"dr\"";
+ DwarfInfoSection = "\t.section\t.debug_info,\"dr\"";
+ DwarfLineSection = "\t.section\t.debug_line,\"dr\"";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"dr\"";
+ DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"dr\"";
+ DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"dr\"";
+ DwarfStrSection = "\t.section\t.debug_str,\"dr\"";
+ DwarfLocSection = "\t.section\t.debug_loc,\"dr\"";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\"";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+}
+
+unsigned
+X86COFFTargetAsmInfo::PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const {
+ CodeModel::Model CM = TM.getCodeModel();
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ unsigned Format = 0;
+
+ if (!is64Bit)
+ // 32 bit targets always encode pointers as 4 bytes
+ Format = DW_EH_PE_sdata4;
+ else {
+ // 64 bit targets encode pointers in 4 bytes iff:
+ // - code model is small OR
+ // - code model is medium and we're emitting externally visible symbols
+ // or any code symbols
+ if (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && (Global ||
+ Reason != DwarfEncoding::Data)))
+ Format = DW_EH_PE_sdata4;
+ else
+ Format = DW_EH_PE_sdata8;
+ }
+
+ if (Global)
+ Format |= DW_EH_PE_indirect;
+
+ return (Format | DW_EH_PE_pcrel);
+ } else {
+ if (is64Bit &&
+ (CM == CodeModel::Small ||
+ (CM == CodeModel::Medium && Reason != DwarfEncoding::Data)))
+ return DW_EH_PE_udata4;
+ else
+ return DW_EH_PE_absptr;
+ }
+}
+
+std::string
+X86COFFTargetAsmInfo::UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const {
+ switch (kind) {
+ case SectionKind::Text:
+ return ".text$linkonce" + GV->getName();
+ case SectionKind::Data:
+ case SectionKind::BSS:
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ return ".data$linkonce" + GV->getName();
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeConst:
+ case SectionKind::RODataMergeStr:
+ return ".rdata$linkonce" + GV->getName();
+ default:
+ assert(0 && "Unknown section kind");
+ }
+ return NULL;
+}
+
+std::string X86COFFTargetAsmInfo::printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+
+ Flags += "\"";
+
+ return Flags;
+}
+
+X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
+ X86GenericTargetAsmInfo(TM) {
+ GlobalPrefix = "_";
+ CommentString = ";";
+
+ PrivateGlobalPrefix = "$";
+ AlignDirective = "\talign\t";
+ ZeroDirective = "\tdb\t";
+ ZeroDirectiveSuffix = " dup(0)";
+ AsciiDirective = "\tdb\t";
+ AscizDirective = 0;
+ Data8bitsDirective = "\tdb\t";
+ Data16bitsDirective = "\tdw\t";
+ Data32bitsDirective = "\tdd\t";
+ Data64bitsDirective = "\tdq\t";
+ HasDotTypeDotSizeDirective = false;
+ HasSingleParameterDotFile = false;
+
+ TextSection = getUnnamedSection("_text", SectionFlags::Code);
+ DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
+
+ JumpTableDataSection = NULL;
+ SwitchToSectionDirective = "";
+ TextSectionStartSuffix = "\tsegment 'CODE'";
+ DataSectionStartSuffix = "\tsegment 'DATA'";
+ SectionEndDirectiveSuffix = "\tends\n";
+}
+
+template <class BaseTAI>
+bool X86TargetAsmInfo<BaseTAI>::LowerToBSwap(CallInst *CI) const {
+ // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // we will turn this bswap into something that will be lowered to logical ops
+ // instead of emitting the bswap asm. For now, we don't support 486 or lower
+ // so don't worry about this.
+
+ // Verify this is a simple bswap.
+ if (CI->getNumOperands() != 2 ||
+ CI->getType() != CI->getOperand(1)->getType() ||
+ !CI->getType()->isInteger())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty || Ty->getBitWidth() % 16 != 0)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+ Value *Op = CI->getOperand(1);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
+
+template <class BaseTAI>
+bool X86TargetAsmInfo<BaseTAI>::ExpandInlineAsm(CallInst *CI) const {
+ InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+ std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+
+ std::string AsmStr = IA->getAsmString();
+
+ // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+ std::vector<std::string> AsmPieces;
+ SplitString(AsmStr, AsmPieces, "\n"); // ; as separator?
+
+ switch (AsmPieces.size()) {
+ default: return false;
+ case 1:
+ AsmStr = AsmPieces[0];
+ AsmPieces.clear();
+ SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
+
+ // bswap $0
+ if (AsmPieces.size() == 2 &&
+ (AsmPieces[0] == "bswap" ||
+ AsmPieces[0] == "bswapq" ||
+ AsmPieces[0] == "bswapl") &&
+ (AsmPieces[1] == "$0" ||
+ AsmPieces[1] == "${0:q}")) {
+ // No need to check constraints, nothing other than the equivalent of
+ // "=r,0" would be valid here.
+ return LowerToBSwap(CI);
+ }
+ // rorw $$8, ${0:w} --> llvm.bswap.i16
+ if (CI->getType() == Type::Int16Ty &&
+ AsmPieces.size() == 3 &&
+ AsmPieces[0] == "rorw" &&
+ AsmPieces[1] == "$$8," &&
+ AsmPieces[2] == "${0:w}" &&
+ IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
+ return LowerToBSwap(CI);
+ }
+ break;
+ case 3:
+ if (CI->getType() == Type::Int64Ty && Constraints.size() >= 2 &&
+ Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+ Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+ // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
+ std::vector<std::string> Words;
+ SplitString(AsmPieces[0], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+ Words.clear();
+ SplitString(AsmPieces[1], Words, " \t");
+ if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+ Words.clear();
+ SplitString(AsmPieces[2], Words, " \t,");
+ if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+ Words[2] == "%edx") {
+ return LowerToBSwap(CI);
+ }
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class X86TargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/X86/X86TargetAsmInfo.h b/lib/Target/X86/X86TargetAsmInfo.h
new file mode 100644
index 000000000000..f89171d3da71
--- /dev/null
+++ b/lib/Target/X86/X86TargetAsmInfo.h
@@ -0,0 +1,75 @@
+//=====-- X86TargetAsmInfo.h - X86 asm properties -------------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86TargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "X86TargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+ extern const char *const x86_asm_table[];
+
+ template <class BaseTAI>
+ struct X86TargetAsmInfo : public BaseTAI {
+ explicit X86TargetAsmInfo(const X86TargetMachine &TM):
+ BaseTAI(TM) {
+ const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+
+ BaseTAI::AsmTransCBE = x86_asm_table;
+ BaseTAI::AssemblerDialect = Subtarget->getAsmFlavor();
+ }
+
+ virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+ private:
+ bool LowerToBSwap(CallInst *CI) const;
+ };
+
+ typedef X86TargetAsmInfo<TargetAsmInfo> X86GenericTargetAsmInfo;
+
+ EXTERN_TEMPLATE_INSTANTIATION(class X86TargetAsmInfo<TargetAsmInfo>);
+
+ struct X86DarwinTargetAsmInfo : public X86TargetAsmInfo<DarwinTargetAsmInfo> {
+ explicit X86DarwinTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual const char *getEHGlobalPrefix() const;
+ };
+
+ struct X86ELFTargetAsmInfo : public X86TargetAsmInfo<ELFTargetAsmInfo> {
+ explicit X86ELFTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ };
+
+ struct X86COFFTargetAsmInfo : public X86GenericTargetAsmInfo {
+ explicit X86COFFTargetAsmInfo(const X86TargetMachine &TM);
+ virtual unsigned PreferredEHDataFormat(DwarfEncoding::Target Reason,
+ bool Global) const;
+ virtual std::string UniqueSectionForGlobal(const GlobalValue* GV,
+ SectionKind::Kind kind) const;
+ virtual std::string printSectionFlags(unsigned flags) const;
+ };
+
+ struct X86WinTargetAsmInfo : public X86GenericTargetAsmInfo {
+ explicit X86WinTargetAsmInfo(const X86TargetMachine &TM);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 000000000000..8264462506aa
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,317 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// X86TargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library. In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this. Though it is unused, do not remove it.
+extern "C" int X86TargetMachineModule;
+int X86TargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<X86_32TargetMachine>
+X("x86", "32-bit X86: Pentium-Pro and above");
+static RegisterTarget<X86_64TargetMachine>
+Y("x86-64", "64-bit X86: EM64T and AMD64");
+
+// No assembler printer by default
+X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
+
+const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
+ if (Subtarget.isFlavorIntel())
+ return new X86WinTargetAsmInfo(*this);
+ else
+ switch (Subtarget.TargetType) {
+ case X86Subtarget::isDarwin:
+ return new X86DarwinTargetAsmInfo(*this);
+ case X86Subtarget::isELF:
+ return new X86ELFTargetAsmInfo(*this);
+ case X86Subtarget::isMingw:
+ case X86Subtarget::isCygwin:
+ return new X86COFFTargetAsmInfo(*this);
+ case X86Subtarget::isWindows:
+ return new X86WinTargetAsmInfo(*this);
+ default:
+ return new X86GenericTargetAsmInfo(*this);
+ }
+}
+
+unsigned X86_32TargetMachine::getJITMatchQuality() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_64TargetMachine::getJITMatchQuality() {
+#if defined(__x86_64__) || defined(_M_AMD64)
+ return 10;
+#endif
+ return 0;
+}
+
+unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "i[3-9]86-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+ TT[4] == '-' && TT[1] - '3' < 6)
+ return 20;
+ // If the target triple is something non-X86, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer32)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
+ // We strongly match "x86_64-*".
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
+ TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
+ return 20;
+
+ // We strongly match "amd64-*".
+ if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' &&
+ TT[3] == '6' && TT[4] == '4' && TT[5] == '-')
+ return 20;
+
+ // If the target triple is something non-X86-64, we don't match.
+ if (!TT.empty()) return 0;
+
+ if (M.getEndianness() == Module::LittleEndian &&
+ M.getPointerSize() == Module::Pointer64)
+ return 10; // Weak match
+ else if (M.getEndianness() != Module::AnyEndianness ||
+ M.getPointerSize() != Module::AnyPointerSize)
+ return 0; // Match for some other target
+
+ return getJITMatchQuality()/2;
+}
+
+X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, false) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
+ : X86TargetMachine(M, FS, true) {
+}
+
+/// X86TargetMachine ctor - Create an ILP32 architecture model
+///
+X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
+ bool is64Bit)
+ : Subtarget(M, FS, is64Bit),
+ DataLayout(Subtarget.getDataLayout()),
+ FrameInfo(TargetFrameInfo::StackGrowsDown,
+ Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+ DefRelocModel = getRelocationModel();
+ // FIXME: Correctly select PIC model for Win64 stuff
+ if (getRelocationModel() == Reloc::Default) {
+ if (Subtarget.isTargetDarwin() ||
+ (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64()))
+ setRelocationModel(Reloc::DynamicNoPIC);
+ else
+ setRelocationModel(Reloc::Static);
+ }
+
+ // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC
+ // is defined as a model for code which may be used in static or
+ // dynamic executables but not necessarily a shared library. On ELF
+ // implement this by using the Static model.
+ if (Subtarget.isTargetELF() &&
+ getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::Static);
+
+ if (Subtarget.is64Bit()) {
+ // No DynamicNoPIC support under X86-64.
+ if (getRelocationModel() == Reloc::DynamicNoPIC)
+ setRelocationModel(Reloc::PIC_);
+ // Default X86-64 code model is small.
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+ }
+
+ if (Subtarget.isTargetCygMing())
+ Subtarget.setPICStyle(PICStyles::WinPIC);
+ else if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyles::Stub);
+ } else if (Subtarget.isTargetELF()) {
+ if (Subtarget.is64Bit())
+ Subtarget.setPICStyle(PICStyles::RIPRel);
+ else
+ Subtarget.setPICStyle(PICStyles::GOT);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Install an instruction selector.
+ PM.add(createX86ISelDag(*this, OptLevel));
+
+ // If we're using Fast-ISel, clean up the mess.
+ if (EnableFastISel)
+ PM.add(createDeadMachineInstructionElimPass());
+
+ // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
+ PM.add(createX87FPRegKillInserterPass());
+
+ return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ PM.add(createX86MaxStackAlignmentCalculatorPass());
+ return false; // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createX86FloatingPointStackifierPass());
+ return true; // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ // FIXME: Move this to TargetJITInfo!
+ // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+ if (DefRelocModel == Reloc::Default &&
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ setRelocationModel(Reloc::Static);
+
+ // 64-bit JIT places everything in the same buffer except external functions.
+ // On Darwin, use small code model but hack the call instruction for
+ // externals. Elsewhere, do not assume globals are in the lower 4G.
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTargetDarwin())
+ setCodeModel(CodeModel::Small);
+ else
+ setCodeModel(CodeModel::Large);
+ }
+
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ // FIXME: Move this to TargetJITInfo!
+ // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+ if (DefRelocModel == Reloc::Default &&
+ (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit()))
+ setRelocationModel(Reloc::Static);
+
+ // 64-bit JIT places everything in the same buffer except external functions.
+ // On Darwin, use small code model but hack the call instruction for
+ // externals. Elsewhere, do not assume globals are in the lower 4G.
+ if (Subtarget.is64Bit()) {
+ if (Subtarget.isTargetDarwin())
+ setCodeModel(CodeModel::Small);
+ else
+ setCodeModel(CodeModel::Large);
+ }
+
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ MachineCodeEmitter &MCE) {
+ PM.add(createX86CodeEmitterPass(*this, MCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm,
+ JITCodeEmitter &JCE) {
+ PM.add(createX86JITCodeEmitterPass(*this, JCE));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
+
+ return false;
+}
+
+/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
+/// RIP-relative on this machine, taking into consideration the relocation
+/// model and subtarget. RIP-relative addresses cannot have a separate
+/// base or index register.
+bool X86TargetMachine::symbolicAddressesAreRIPRel() const {
+ return getRelocationModel() != Reloc::Static &&
+ Subtarget.isPICStyleRIPRel();
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 000000000000..ecc1d39701de
--- /dev/null
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,124 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86ISelLowering.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class X86TargetMachine : public LLVMTargetMachine {
+ X86Subtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ TargetFrameInfo FrameInfo;
+ X86InstrInfo InstrInfo;
+ X86JITInfo JITInfo;
+ X86TargetLowering TLInfo;
+ X86ELFWriterInfo ELFWriterInfo;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ X86TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+ X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
+
+ virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual X86JITInfo *getJITInfo() { return &JITInfo; }
+ virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; }
+ virtual X86TargetLowering *getTargetLowering() const {
+ return const_cast<X86TargetLowering*>(&TLInfo);
+ }
+ virtual const X86RegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+ return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+ }
+
+ static unsigned getModuleMatchQuality(const Module &M);
+ static unsigned getJITMatchQuality();
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
+ // Set up the pass pipeline.
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, MachineCodeEmitter &MCE);
+ virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DumpAsm, JITCodeEmitter &JCE);
+
+ /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
+ /// RIP-relative on this machine, taking into consideration the relocation
+ /// model and subtarget. RIP-relative addresses cannot have a separate
+ /// base or index register.
+ bool symbolicAddressesAreRIPRel() const;
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+public:
+ X86_32TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+public:
+ X86_64TargetMachine(const Module &M, const std::string &FS);
+
+ static unsigned getJITMatchQuality();
+ static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
new file mode 100644
index 000000000000..a7aba14a7a14
--- /dev/null
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_TARGET_DEFINITIONS XCore.td)
+
+tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(XCoreGenRegisterNames.inc -gen-register-enums)
+tablegen(XCoreGenRegisterInfo.inc -gen-register-desc)
+tablegen(XCoreGenInstrNames.inc -gen-instr-enums)
+tablegen(XCoreGenInstrInfo.inc -gen-instr-desc)
+tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
+tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
+tablegen(XCoreGenCallingConv.inc -gen-callingconv)
+tablegen(XCoreGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(XCore
+ XCoreAsmPrinter.cpp
+ XCoreFrameInfo.cpp
+ XCoreInstrInfo.cpp
+ XCoreISelDAGToDAG.cpp
+ XCoreISelLowering.cpp
+ XCoreRegisterInfo.cpp
+ XCoreSubtarget.cpp
+ XCoreTargetAsmInfo.cpp
+ XCoreTargetMachine.cpp
+ )
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
new file mode 100644
index 000000000000..568df70ab63a
--- /dev/null
+++ b/lib/Target/XCore/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/XCore/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMXCore
+TARGET = XCore
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
+ XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \
+ XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \
+ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+ XCoreGenSubtarget.inc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/XCore/README.txt b/lib/Target/XCore/README.txt
new file mode 100644
index 000000000000..deaeb0f2a93b
--- /dev/null
+++ b/lib/Target/XCore/README.txt
@@ -0,0 +1,8 @@
+To-do
+-----
+
+* Instruction encodings
+* Tailcalls
+* Investigate loop alignment
+* Add builtins
+* Make better use of lmul / macc
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
new file mode 100644
index 000000000000..5722b873e1aa
--- /dev/null
+++ b/lib/Target/XCore/XCore.h
@@ -0,0 +1,42 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class FunctionPass;
+ class TargetMachine;
+ class XCoreTargetMachine;
+ class raw_ostream;
+
+ FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+ FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS,
+ XCoreTargetMachine &TM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose);
+} // end namespace llvm;
+
+// Defines symbolic names for XCore registers. This defines a mapping from
+// register name to register number.
+//
+#include "XCoreGenRegisterNames.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#include "XCoreGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
new file mode 100644
index 000000000000..7a2dcdbf9fe5
--- /dev/null
+++ b/lib/Target/XCore/XCore.td
@@ -0,0 +1,62 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureXS1A
+ : SubtargetFeature<"xs1a", "IsXS1A", "true",
+ "Enable XS1A instructions">;
+
+def FeatureXS1B
+ : SubtargetFeature<"xs1b", "IsXS1B", "true",
+ "Enable XS1B instructions">;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", [FeatureXS1A]>;
+def : Proc<"xs1a-generic", [FeatureXS1A]>;
+def : Proc<"xs1b-generic", [FeatureXS1B]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = XCoreInstrInfo;
+}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 000000000000..c9a6d8afd316
--- /dev/null
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,472 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+static cl::opt<std::string> FileDirective("xcore-file-directive", cl::Optional,
+ cl::desc("Output a file directive into the assembly file"),
+ cl::Hidden,
+ cl::value_desc("filename"),
+ cl::init(""));
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden,
+ cl::value_desc("number"),
+ cl::init(8));
+
+namespace {
+ class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter {
+ DwarfWriter *DW;
+ const XCoreSubtarget &Subtarget;
+ public:
+ explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM,
+ const TargetAsmInfo *T, CodeGenOpt::Level OL,
+ bool V)
+ : AsmPrinter(O, TM, T, OL, V), DW(0),
+ Subtarget(*TM.getSubtargetImpl()) {}
+
+ virtual const char *getPassName() const {
+ return "XCore Assembly Printer";
+ }
+
+ void printMemOperand(const MachineInstr *MI, int opNum);
+ void printOperand(const MachineInstr *MI, int opNum);
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+
+ void emitFileDirective(const std::string &filename);
+ void emitGlobalDirective(const std::string &name);
+ void emitExternDirective(const std::string &name);
+
+ void emitArrayBound(const std::string &name, const GlobalVariable *GV);
+ void emitGlobal(const GlobalVariable *GV);
+
+ void emitFunctionStart(MachineFunction &MF);
+ void emitFunctionEnd(MachineFunction &MF);
+
+ bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
+ bool runOnMachineFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool doFinalization(Module &M);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<DwarfWriter>();
+ }
+ };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+/// createXCoreCodePrinterPass - Returns a pass that prints the XCore
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description. This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o,
+ XCoreTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose) {
+ return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << '\\'
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+ }
+ }
+}
+
+void XCoreAsmPrinter::
+emitFileDirective(const std::string &name)
+{
+ O << "\t.file\t\"";
+ PrintEscapedString(name, O);
+ O << "\"\n";
+}
+
+void XCoreAsmPrinter::
+emitGlobalDirective(const std::string &name)
+{
+ O << TAI->getGlobalDirective() << name;
+ O << "\n";
+}
+
+void XCoreAsmPrinter::
+emitExternDirective(const std::string &name)
+{
+ O << "\t.extern\t" << name;
+ O << '\n';
+}
+
+void XCoreAsmPrinter::
+emitArrayBound(const std::string &name, const GlobalVariable *GV)
+{
+ assert(((GV->hasExternalLinkage() ||
+ GV->hasWeakLinkage()) ||
+ GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(
+ cast<PointerType>(GV->getType())->getElementType()))
+ {
+ O << TAI->getGlobalDirective() << name << ".globound" << "\n";
+ O << TAI->getSetDirective() << name << ".globound" << ","
+ << ATy->getNumElements() << "\n";
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ O << TAI->getWeakDefDirective() << name << ".globound" << "\n";
+ }
+ }
+}
+
+void XCoreAsmPrinter::
+emitGlobal(const GlobalVariable *GV)
+{
+ const TargetData *TD = TM.getTargetData();
+
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ SwitchToSection(TAI->SectionForGlobal(GV));
+
+ std::string name = Mang->getValueName(GV);
+ Constant *C = GV->getInitializer();
+ unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+
+ // Mark the start of the global
+ O << "\t.cc_top " << name << ".data," << name << "\n";
+
+ switch (GV->getLinkage()) {
+ case GlobalValue::AppendingLinkage:
+ cerr << "AppendingLinkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ case GlobalValue::ExternalLinkage:
+ emitArrayBound(name, GV);
+ emitGlobalDirective(name);
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+ O << TAI->getWeakDefDirective() << name << "\n";
+ }
+ // FALL THROUGH
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
+ break;
+ case GlobalValue::GhostLinkage:
+ cerr << "Should not have any unmaterialized functions!\n";
+ abort();
+ case GlobalValue::DLLImportLinkage:
+ cerr << "DLLImport linkage is not supported by this target!\n";
+ abort();
+ case GlobalValue::DLLExportLinkage:
+ cerr << "DLLExport linkage is not supported by this target!\n";
+ abort();
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ EmitAlignment(Align, GV, 2);
+
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ if (GV->isThreadLocal()) {
+ Size *= MaxThreads;
+ }
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << name << ",@object\n";
+ O << "\t.size " << name << "," << Size << "\n";
+ }
+ O << name << ":\n";
+
+ EmitGlobalConstant(C);
+ if (GV->isThreadLocal()) {
+ for (unsigned i = 1; i < MaxThreads; ++i) {
+ EmitGlobalConstant(C);
+ }
+ }
+ if (Size < 4) {
+ // The ABI requires that unsigned scalar types smaller than 32 bits
+ // are are padded to 32 bits.
+ EmitZeros(4 - Size);
+ }
+
+ // Mark the end of the global
+ O << "\t.cc_bottom " << name << ".data\n";
+ } else {
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+}
+
+/// Emit the directives on the start of functions
+void XCoreAsmPrinter::
+emitFunctionStart(MachineFunction &MF)
+{
+ // Print out the label for the function.
+ const Function *F = MF.getFunction();
+
+ SwitchToSection(TAI->SectionForGlobal(F));
+
+ // Mark the start of the function
+ O << "\t.cc_top " << CurrentFnName << ".function," << CurrentFnName << "\n";
+
+ switch (F->getLinkage()) {
+ default: assert(0 && "Unknown linkage type!");
+ case Function::InternalLinkage: // Symbols default to internal.
+ case Function::PrivateLinkage:
+ break;
+ case Function::ExternalLinkage:
+ emitGlobalDirective(CurrentFnName);
+ break;
+ case Function::LinkOnceAnyLinkage:
+ case Function::LinkOnceODRLinkage:
+ case Function::WeakAnyLinkage:
+ case Function::WeakODRLinkage:
+ // TODO Use COMDAT groups for LinkOnceLinkage
+ O << TAI->getGlobalDirective() << CurrentFnName << "\n";
+ O << TAI->getWeakDefDirective() << CurrentFnName << "\n";
+ break;
+ }
+ // (1 << 1) byte aligned
+ EmitAlignment(1, F, 1);
+ if (TAI->hasDotTypeDotSizeDirective()) {
+ O << "\t.type " << CurrentFnName << ",@function\n";
+ }
+ O << CurrentFnName << ":\n";
+}
+
+/// Emit the directives on the end of functions
+void XCoreAsmPrinter::
+emitFunctionEnd(MachineFunction &MF)
+{
+ // Mark the end of the function
+ O << "\t.cc_bottom " << CurrentFnName << ".function\n";
+}
+
+/// runOnMachineFunction - This uses the printMachineInstruction()
+/// method to print assembly for each instruction.
+///
+bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF)
+{
+ this->MF = &MF;
+
+ SetupMachineFunction(MF);
+
+ // Print out constants referenced by the function
+ EmitConstantPool(MF.getConstantPool());
+
+ // Print out jump tables referenced by the function
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
+ // Emit the function start directives
+ emitFunctionStart(MF);
+
+ // Emit pre-function debug information.
+ DW->BeginFunction(&MF);
+
+ // Print out code for the function.
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+
+ // Print a label for the basic block.
+ if (I != MF.begin()) {
+ printBasicBlockLabel(I, true , true);
+ O << '\n';
+ }
+
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ // Print the assembly for the instruction.
+ O << "\t";
+ printMachineInstruction(II);
+ }
+
+ // Each Basic Block is separated by a newline
+ O << '\n';
+ }
+
+ // Emit function end directives
+ emitFunctionEnd(MF);
+
+ // Emit post-function debug information.
+ DW->EndFunction(&MF);
+
+ // We didn't modify anything.
+ return false;
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
+{
+ printOperand(MI, opNum);
+
+ if (MI->getOperand(opNum+1).isImm()
+ && MI->getOperand(opNum+1).getImm() == 0)
+ return;
+
+ O << "+";
+ printOperand(MI, opNum+1);
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+ else
+ assert(0 && "not implemented");
+ break;
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ {
+ const GlobalValue *GV = MO.getGlobal();
+ O << Mang->getValueName(GV);
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+ }
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ O << MO.getSymbolName();
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+ default:
+ assert(0 && "not implemented");
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode) {
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ // Check for mov mnemonic
+ unsigned src, dst, srcSR, dstSR;
+ if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
+ O << "\tmov ";
+ O << TM.getRegisterInfo()->get(dst).AsmName;
+ O << ", ";
+ O << TM.getRegisterInfo()->get(src).AsmName;
+ O << "\n";
+ return;
+ }
+ if (printInstruction(MI)) {
+ return;
+ }
+ assert(0 && "Unhandled instruction in asm writer!");
+}
+
+bool XCoreAsmPrinter::doInitialization(Module &M) {
+ bool Result = AsmPrinter::doInitialization(M);
+
+ if (!FileDirective.empty()) {
+ emitFileDirective(FileDirective);
+ }
+
+ // Print out type strings for external functions here
+ for (Module::const_iterator I = M.begin(), E = M.end();
+ I != E; ++I) {
+ if (I->isDeclaration() && !I->isIntrinsic()) {
+ switch (I->getLinkage()) {
+ default:
+ assert(0 && "Unexpected linkage");
+ case Function::ExternalWeakLinkage:
+ ExtWeakSymbols.insert(I);
+ // fallthrough
+ case Function::ExternalLinkage:
+ break;
+ }
+ }
+ }
+
+ // Emit initial debug information.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ assert(DW && "Dwarf Writer is not available");
+ DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
+ O, this, TAI);
+ return Result;
+}
+
+bool XCoreAsmPrinter::doFinalization(Module &M) {
+
+ // Print out module-level global variables.
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I) {
+ emitGlobal(I);
+ }
+
+ // Emit final debug information.
+ DW->EndModule();
+
+ return AsmPrinter::doFinalization(M);
+}
diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 000000000000..8107e329bd58
--- /dev/null
+++ b/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,33 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+ // i32 are returned in registers R0, R1, R2, R3
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The first 4 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+ // Integer values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/lib/Target/XCore/XCoreFrameInfo.cpp b/lib/Target/XCore/XCoreFrameInfo.cpp
new file mode 100644
index 000000000000..f50dc96c6ba9
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameInfo.cpp
@@ -0,0 +1,27 @@
+//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameInfo.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameInfo:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm):
+ TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0)
+{
+ // Do nothing
+}
diff --git a/lib/Target/XCore/XCoreFrameInfo.h b/lib/Target/XCore/XCoreFrameInfo.h
new file mode 100644
index 000000000000..2c67577181ec
--- /dev/null
+++ b/lib/Target/XCore/XCoreFrameInfo.h
@@ -0,0 +1,34 @@
+//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class XCoreFrameInfo: public TargetFrameInfo {
+
+ public:
+ XCoreFrameInfo(const TargetMachine &tm);
+
+ //! Stack slot size (4 bytes)
+ static int stackSlotSize() {
+ return 4;
+ }
+ };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 000000000000..eed34a4b635b
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,230 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreISelLowering.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+ class XCoreDAGToDAGISel : public SelectionDAGISel {
+ XCoreTargetLowering &Lowering;
+ const XCoreSubtarget &Subtarget;
+
+ public:
+ XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+ : SelectionDAGISel(TM),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ SDNode *Select(SDValue Op);
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ // Complex Pattern Selectors.
+ bool SelectADDRspii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRdpii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+ bool SelectADDRcpii(SDValue Op, SDValue Addr, SDValue &Base,
+ SDValue &Offset);
+
+ virtual void InstructionSelect();
+
+ virtual const char *getPassName() const {
+ return "XCore DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Include the pieces autogenerated from the target description.
+ #include "XCoreGenDAGISel.inc"
+ };
+} // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+ return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ FrameIndexSDNode *FIN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+ // Constant positive word offset from frame index
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr,
+ SDValue &Base, SDValue &Offset) {
+ if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ ConstantSDNode *CN = 0;
+ if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+ && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && (CN->getSExtValue() % 4 == 0)) {
+ // Constant word offset from a object in the data region
+ Base = Addr.getOperand(0).getOperand(0);
+ Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// InstructionSelect - This callback is invoked by
+/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+void XCoreDAGToDAGISel::
+InstructionSelect() {
+ DEBUG(BB->dump());
+
+ // Select target instructions for the DAG.
+ SelectRoot(*CurDAG);
+
+ CurDAG->RemoveDeadNodes();
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT NVT = N->getValueType(0);
+ if (NVT == MVT::i32) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ if (Predicate_immMskBitp(N)) {
+ SDValue MskSize = Transform_msksize_xform(N);
+ return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize);
+ }
+ else if (! Predicate_immU16(N)) {
+ unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+ return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
+ }
+ break;
+ }
+ case ISD::SMUL_LOHI: {
+ // FIXME fold addition into the macc instruction
+ if (!Subtarget.isXS1A()) {
+ SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) };
+ SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl,
+ MVT::i32, MVT::i32, Ops, 4);
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
+ return NULL;
+ }
+ break;
+ }
+ case ISD::UMUL_LOHI: {
+ // FIXME fold addition into the macc / lmul instruction
+ SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32,
+ CurDAG->getTargetConstant(0, MVT::i32)), 0);
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Zero, Zero };
+ SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32,
+ MVT::i32, Ops, 4);
+ ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
+ ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
+ return NULL;
+ }
+ case XCoreISD::LADD: {
+ if (!Subtarget.isXS1A()) {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(2) };
+ return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ case XCoreISD::LSUB: {
+ if (!Subtarget.isXS1A()) {
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(2) };
+ return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ break;
+ }
+ // Other cases are autogenerated.
+ }
+ }
+ return SelectCode(Op);
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 000000000000..93c5f59f642b
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,934 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/VectorExtras.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode)
+ {
+ case XCoreISD::BL : return "XCoreISD::BL";
+ case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+ case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+ case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+ case XCoreISD::STWSP : return "XCoreISD::STWSP";
+ case XCoreISD::RETSP : return "XCoreISD::RETSP";
+ default : return NULL;
+ }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+ : TargetLowering(XTM),
+ TM(XTM),
+ Subtarget(*XTM.getSubtargetImpl()) {
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Division is expensive
+ setIntDivIsCheap(false);
+
+ setShiftAmountType(MVT::i32);
+ // shl X, 32 == 0
+ setShiftAmountFlavor(Extend);
+ setStackPointerRegisterToSaveRestore(XCore::SP);
+
+ setSchedulingPreference(SchedulingForRegPressure);
+
+ // Use i32 for setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // XCore does not have the NodeTypes below.
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+ // Stop the combiner recombining select and set_cc
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // 64bit
+ if (!Subtarget.isXS1A()) {
+ setOperationAction(ISD::ADD, MVT::i64, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Custom);
+ }
+ if (Subtarget.isXS1A()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ }
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ // Bit Manipulation
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL , MVT::i32, Expand);
+ setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // Expand jump tables for now
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+ // RET must be custom lowered, to meet ABI requirements
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+ // Thread Local Storage
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+ // Conversion of i64 -> double produces constantpool nodes
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Loads
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Varargs
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+ // Dynamic stack
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+ // Debug
+ setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode())
+ {
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ // FIXME: Remove these when LegalizeDAGTypes lands.
+ case ISD::ADD:
+ case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ default:
+ assert(0 && "unimplemented operand");
+ return SDValue();
+ }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default:
+ assert(0 && "Don't know how to custom expand this!");
+ return;
+ case ISD::ADD:
+ case ISD::SUB:
+ Results.push_back(ExpandADDSUB(N, DAG));
+ return;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(3), Op.getOperand(4));
+ return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+ Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG)
+{
+ // FIXME there is no actual debug info here
+ DebugLoc dl = GA.getDebugLoc();
+ if (isa<Function>(GV)) {
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+ } else if (!Subtarget.isXS1A()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine constness
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ bool isConst = GVar && GVar->isConstant();
+ if (isConst) {
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+ }
+ }
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG)
+{
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ // If it's a debug information descriptor, don't mess with it.
+ if (DAG.isVerifiedDebugInfoDesc(Op))
+ return GA;
+ return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(const Type *Ty) {
+ const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ // transform to label + getid() * size
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias then use the aliasee to determine size
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+ }
+ if (! GVar) {
+ assert(0 && "Thread local object not a GlobalVariable?");
+ return SDValue();
+ }
+ const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+ if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+ cerr << "Size of thread local object " << GVar->getName()
+ << " is unknown\n";
+ abort();
+ }
+ SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+ const TargetData *TD = TM.getTargetData();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+ DAG.getConstant(Size, MVT::i32));
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG)
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ // FIXME there isn't really debug info here
+ DebugLoc dl = CP->getDebugLoc();
+ if (Subtarget.isXS1A()) {
+ assert(0 && "Lowering of constant pool unimplemented");
+ return SDValue();
+ } else {
+ MVT PtrVT = Op.getValueType();
+ SDValue Res;
+ if (CP->isMachineConstantPoolEntry()) {
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ } else {
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ }
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+ }
+}
+
+SDValue XCoreTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+{
+ // FIXME there isn't really debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ MVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG)
+{
+ assert(N->getValueType(0) == MVT::i64 &&
+ (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+ "Unknown operand to lower!");
+ assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a");
+ DebugLoc dl = N->getDebugLoc();
+
+ // Extract components
+ SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(0, MVT::i32));
+ SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(0), DAG.getConstant(1, MVT::i32));
+ SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(0, MVT::i32));
+ SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+ // Expand
+ unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+ XCoreISD::LSUB;
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSL, RHSL, Zero);
+ SDValue Lo(Carry.getNode(), 1);
+
+ SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ LHSH, RHSH, Carry);
+ SDValue Hi(Ignored.getNode(), 1);
+ // Merge the pieces
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG)
+{
+ assert(0 && "unimplemented");
+ // FIX Arguments passed by reference need a extra dereference.
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ MVT VT = Node->getValueType(0);
+ SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+ Node->getOperand(1), V, 0);
+ // Increment the pointer, VAList, to the next vararg
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
+ getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0);
+ // Load the actual argument out of the pointer VAList
+ return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+ // vastart stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument
+ MachineFunction &MF = DAG.getMachineFunction();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Depths > 0 not supported yet!
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//
+// The lower operations present on calling convention works on this order:
+// LowerCALL (virt regs --> phys regs, virt regs --> stack)
+// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+// LowerRET (virt regs --> phys regs)
+// LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// CALL Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore custom CALL implementation
+SDValue XCoreTargetLowering::
+LowerCALL(SDValue Op, SelectionDAG &DAG)
+{
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ unsigned CallingConv = TheCall->getCallingConv();
+ // For now, only CallingConv::C implemented
+ switch (CallingConv)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::Fast:
+ case CallingConv::C:
+ return LowerCCCCallTo(Op, DAG, CallingConv);
+ }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue XCoreTargetLowering::
+LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC)
+{
+ CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+ SDValue Chain = TheCall->getChain();
+ SDValue Callee = TheCall->getCallee();
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ // The ABI dictates there should be one stack slot available to the callee
+ // on function entry (for saving lr).
+ CCInfo.AllocateStack(4, 4);
+
+ CCInfo.AnalyzeCallOperands(TheCall, CC_XCore);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ getPointerTy(), true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+ SmallVector<SDValue, 12> MemOpChains;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = TheCall->getArg(i);
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+
+ int Offset = VA.getLocMemOffset();
+
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ Chain, Arg,
+ DAG.getConstant(Offset/4, MVT::i32)));
+ }
+ }
+
+ // Transform all store nodes into one single node because
+ // all store nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+ // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, getPointerTy(), true),
+ DAG.getConstant(0, getPointerTy(), true),
+ InFlag);
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+ Op.getResNo());
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers. This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *XCoreTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+ unsigned CallingConv, SelectionDAG &DAG) {
+ bool isVarArg = TheCall->isVarArg();
+ DebugLoc dl = TheCall->getDebugLoc();
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+
+ CCInfo.AnalyzeCallResult(TheCall, RetCC_XCore);
+ SmallVector<SDValue, 8> ResultVals;
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ ResultVals.push_back(Chain.getValue(0));
+ }
+
+ ResultVals.push_back(Chain);
+
+ // Merge everything together with a MERGE_VALUES node.
+ return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+ &ResultVals[0], ResultVals.size()).getNode();
+}
+
+//===----------------------------------------------------------------------===//
+// FORMAL_ARGUMENTS Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore custom FORMAL_ARGUMENTS implementation
+SDValue XCoreTargetLowering::
+LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG)
+{
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch(CC)
+ {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return LowerCCCArguments(Op, DAG);
+ }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue XCoreTargetLowering::
+LowerCCCArguments(SDValue Op, SelectionDAG &DAG)
+{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SDValue Root = Op.getOperand(0);
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+ unsigned CC = MF.getFunction()->getCallingConv();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+ CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_XCore);
+
+ unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize();
+
+ SmallVector<SDValue, 16> ArgValues;
+
+ unsigned LRSaveSize = StackSlotSize;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+ CCValAssign &VA = ArgLocs[i];
+
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ MVT RegVT = VA.getLocVT();
+ switch (RegVT.getSimpleVT()) {
+ default:
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << RegVT.getSimpleVT()
+ << "\n";
+ abort();
+ case MVT::i32:
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgValues.push_back(DAG.getCopyFromReg(Root, dl, VReg, RegVT));
+ }
+ } else {
+ // sanity check
+ assert(VA.isMemLoc());
+ // Load the argument to a virtual register
+ unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+ if (ObjSize > StackSlotSize) {
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << VA.getLocVT().getSimpleVT()
+ << "\n";
+ }
+ // Create the frame index object for this incoming parameter...
+ int FI = MFI->CreateFixedObject(ObjSize,
+ LRSaveSize + VA.getLocMemOffset());
+
+ // Create the SelectionDAG nodes corresponding to a load
+ //from this parameter
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, NULL, 0));
+ }
+ }
+
+ if (isVarArg) {
+ /* Argument registers */
+ static const unsigned ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+ };
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+ array_lengthof(ArgRegs));
+ if (FirstVAReg < array_lengthof(ArgRegs)) {
+ SmallVector<SDValue, 4> MemOps;
+ int offset = 0;
+ // Save remaining registers, storing higher register numbers at a higher
+ // address
+ for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+ // Create a stack slot
+ int FI = MFI->CreateFixedObject(4, offset);
+ if (i == FirstVAReg) {
+ XFI->setVarArgsFrameIndex(FI);
+ }
+ offset -= StackSlotSize;
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // Move argument from phys reg -> virt reg
+ unsigned VReg = RegInfo.createVirtualRegister(
+ XCore::GRRegsRegisterClass);
+ RegInfo.addLiveIn(ArgRegs[i], VReg);
+ SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+ // Move argument from virt reg -> stack
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else {
+ // This will point to the next argument passed via stack.
+ XFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset()));
+ }
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT> RetVT(Op.getNode()->value_begin(),
+ Op.getNode()->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, dl, RetVT,
+ &ArgValues[0], ArgValues.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerRET(SDValue Op, SelectionDAG &DAG)
+{
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+ unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ DebugLoc dl = Op.getDebugLoc();
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+ // Analize return values of ISD::RET
+ CCInfo.AnalyzeReturn(Op.getNode(), RetCC_XCore);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ // The chain is always operand #0
+ SDValue Chain = Op.getOperand(0);
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // ISD::RET => ret chain, (regnum1,val1), ...
+ // So i*2+1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Op.getOperand(i*2+1), Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on XCore is always a "retsp 0"
+ if (Flag.getNode())
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+ Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ assert((MI->getOpcode() == XCore::SELECT_CC) &&
+ "Unexpected instr type to insert");
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+ // to set, the condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ sinkMBB->transferSuccessors(BB);
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+ return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+ return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+ return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const {
+ MVT VT = getValueType(Ty, true);
+ // Get expected value type after legalization
+ switch (VT.getSimpleVT()) {
+ // Legal load / stores
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ // Expand i1 -> i8
+ case MVT::i1:
+ VT = MVT::i8;
+ break;
+ // Everything else is lowered to words
+ default:
+ VT = MVT::i32;
+ break;
+ }
+ if (AM.BaseGV) {
+ return VT == MVT::i32 && !AM.HasBaseReg && AM.Scale == 0 &&
+ AM.BaseOffs%4 == 0;
+ }
+
+ switch (VT.getSimpleVT()) {
+ default:
+ return false;
+ case MVT::i8:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs(AM.BaseOffs);
+ }
+ return AM.Scale == 1 && AM.BaseOffs == 0;
+ case MVT::i16:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs2(AM.BaseOffs);
+ }
+ return AM.Scale == 2 && AM.BaseOffs == 0;
+ case MVT::i32:
+ // reg + imm
+ if (AM.Scale == 0) {
+ return isImmUs4(AM.BaseOffs);
+ }
+ // reg + reg<<2
+ return AM.Scale == 4 && AM.BaseOffs == 0;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::vector<unsigned> XCoreTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const
+{
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2,
+ XCore::R3, XCore::R4, XCore::R5,
+ XCore::R6, XCore::R7, XCore::R8,
+ XCore::R9, XCore::R10, XCore::R11, 0);
+ break;
+ }
+ return std::vector<unsigned>();
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 000000000000..993ecbdc60a8
--- /dev/null
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,123 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+ // Forward delcarations
+ class XCoreSubtarget;
+ class XCoreTargetMachine;
+
+ namespace XCoreISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END,
+
+ // Branch and link (call)
+ BL,
+
+ // pc relative address
+ PCRelativeWrapper,
+
+ // dp relative address
+ DPRelativeWrapper,
+
+ // cp relative address
+ CPRelativeWrapper,
+
+ // Store word to stack
+ STWSP,
+
+ // Corresponds to retsp instruction
+ RETSP,
+
+ // Corresponds to LADD instruction
+ LADD,
+
+ // Corresponds to LSUB instruction
+ LSUB
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+ class XCoreTargetLowering : public TargetLowering
+ {
+ public:
+
+ explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ const Type *Ty) const;
+
+ private:
+ const XCoreTargetMachine &TM;
+ const XCoreSubtarget &Subtarget;
+
+ // Lower Operand helpers
+ SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC);
+ SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode*TheCall,
+ unsigned CallingConv, SelectionDAG &DAG);
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
+ SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV,
+ SelectionDAG &DAG);
+
+ // Lower Operand specifics
+ SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+
+ // Inline asm support
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const;
+
+ // Expand specifics
+ SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 000000000000..8002c993270c
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "XCore";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstXCore<outs, ins, asmstr, pattern> {
+ let Inst{31-0} = 0;
+}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 000000000000..504d2025edcf
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,524 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "XCoreGenInstrInfo.inc"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace XCore {
+
+ // XCore Condition Codes
+ enum CondCode {
+ COND_TRUE,
+ COND_FALSE,
+ COND_INVALID
+ };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo(void)
+ : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
+ RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSR, unsigned &DstSR) const {
+ SrcSR = DstSR = 0; // No sub-registers.
+
+ // We look for 4 kinds of patterns here:
+ // add dst, src, 0
+ // sub dst, src, 0
+ // or dst, src, src
+ // and dst, src, src
+ if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus)
+ && isZeroImm(MI.getOperand(2))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r)
+ && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::STWFI)
+ {
+ if ((MI->getOperand(1).isFI()) && // is a stack slot
+ (MI->getOperand(2).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(2))))
+ {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// isInvariantLoad - Return true if the specified instruction (which is marked
+/// mayLoad) is loading from a location whose value is invariant across the
+/// function. For example, loading a value from the constant pool or from
+/// from the argument area of a function if it does not change. This should
+/// only return true of *all* loads the instruction does are invariant (if it
+/// does multiple loads).
+bool
+XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const {
+ // Loads from constants pools and loads from invariant argument slots are
+ // invariant
+ int Opcode = MI->getOpcode();
+ if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) {
+ return MI->getOperand(1).isCPI();
+ }
+ int FrameIndex;
+ if (isLoadFromStackSlot(MI, FrameIndex)) {
+ const MachineFrameInfo &MFI =
+ *MI->getParent()->getParent()->getFrameInfo();
+ return MFI.isFixedObjectIndex(FrameIndex) &&
+ MFI.isImmutableObjectIndex(FrameIndex);
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+ return BrOpc == XCore::BRFU_u6
+ || BrOpc == XCore::BRFU_lu6
+ || BrOpc == XCore::BRBU_u6
+ || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+ return BrOpc == XCore::BRFT_ru6
+ || BrOpc == XCore::BRFT_lru6
+ || BrOpc == XCore::BRBT_ru6
+ || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+ return BrOpc == XCore::BRFF_ru6
+ || BrOpc == XCore::BRFF_lru6
+ || BrOpc == XCore::BRBF_ru6
+ || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+ return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+ if (IsBRT(BrOpc)) {
+ return XCore::COND_TRUE;
+ } else if (IsBRF(BrOpc)) {
+ return XCore::COND_FALSE;
+ } else {
+ return XCore::COND_INVALID;
+ }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::BRFT_lru6;
+ case XCore::COND_FALSE : return XCore::BRFF_lru6;
+ }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+ switch (CC) {
+ default: assert(0 && "Illegal condition code!");
+ case XCore::COND_TRUE : return XCore::COND_FALSE;
+ case XCore::COND_FALSE : return XCore::COND_TRUE;
+ }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target). Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+/// just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+/// the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+/// an successor block, it sets TBB to be the branch destination block and a
+/// list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+/// block, it returns the 'true' destination in TBB, the 'false' destination
+/// in FBB, and a list of operands that evaluate the condition. These
+/// operands can be passed to other TargetInstrInfo methods to create new
+/// branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (IsBRU(LastInst->getOpcode())) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+ if (BranchCode == XCore::COND_INVALID)
+ return true; // Can't handle indirect branch.
+
+ // Conditional branch
+ // Block ends with fall-through condbranch.
+
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+ // If the block ends with conditional branch followed by unconditional,
+ // handle it.
+ if (BranchCode != XCore::COND_INVALID
+ && IsBRU(LastInst->getOpcode())) {
+
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ Cond.push_back(SecondLastInst->getOperand(0));
+
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (IsBRU(SecondLastInst->getOpcode()) &&
+ IsBRU(LastInst->getOpcode())) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond)const{
+ // FIXME there should probably be a DebugLoc argument here
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "Unexpected number of components!");
+
+ if (FBB == 0) { // One way branch.
+ if (Cond.empty()) {
+ // Unconditional branch
+ BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB);
+ } else {
+ // Conditional branch.
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ }
+ return 1;
+ }
+
+ // Two-way Conditional branch.
+ assert(Cond.size() == 2 && "Unexpected number of components!");
+ unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+ BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ .addMBB(TBB);
+ BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB);
+ return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (!IsCondBranch(I->getOpcode()))
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ if (DestRC == SrcRC) {
+ if (DestRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg)
+ .addImm(0);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP &&
+ DestRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg)
+ .addImm(0);
+ return true;
+ }
+ if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP &&
+ SrcRC == XCore::GRRegsRegisterClass) {
+ BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+ .addReg(SrcReg);
+ return true;
+ }
+ return false;
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::STWFI))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+ bool isKill, SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "unimplemented\n");
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const
+{
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (I != MBB.end()) DL = I->getDebugLoc();
+ BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+ .addFrameIndex(FrameIndex)
+ .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+ assert(0 && "unimplemented\n");
+}
+
+bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const
+{
+ if (CSI.empty()) {
+ return true;
+ }
+ MachineFunction *MF = MBB.getParent();
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+
+ bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(it->getReg());
+
+ storeRegToStackSlot(MBB, MI, it->getReg(), true,
+ it->getFrameIdx(), it->getRegClass());
+ if (emitFrameMoves) {
+ unsigned SaveLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addImm(SaveLabelId);
+ XFI->getSpillLabels().push_back(
+ std::pair<unsigned, CalleeSavedInfo>(SaveLabelId, *it));
+ }
+ }
+ return true;
+}
+
+bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const
+{
+ bool AtStart = MI == MBB.begin();
+ MachineBasicBlock::iterator BeforeI = MI;
+ if (!AtStart)
+ --BeforeI;
+ for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+ it != CSI.end(); ++it) {
+
+ loadRegFromStackSlot(MBB, MI, it->getReg(),
+ it->getFrameIdx(),
+ it->getRegClass());
+ assert(MI != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert multiple
+ // instructions.
+ if (AtStart)
+ MI = MBB.begin();
+ else {
+ MI = BeforeI;
+ ++MI;
+ }
+ }
+ return true;
+}
+
+/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not
+/// fall-through into its successor block.
+bool XCoreInstrInfo::
+BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
+{
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case XCore::RETSP_u6: // Return.
+ case XCore::RETSP_lu6:
+ case XCore::BAU_1r: // Indirect branch.
+ case XCore::BRFU_u6: // Uncond branch.
+ case XCore::BRFU_lu6:
+ case XCore::BRBU_u6:
+ case XCore::BRBU_lu6:
+ return true;
+ default: return false;
+ }
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+ assert((Cond.size() == 2) &&
+ "Invalid XCore branch condition!");
+ Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+ return false;
+}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 000000000000..08708863ad57
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,110 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+namespace llvm {
+
+class XCoreInstrInfo : public TargetInstrInfoImpl {
+ const XCoreRegisterInfo RI;
+public:
+ XCoreInstrInfo(void);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual bool isInvariantLoad(const MachineInstr *MI) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+
+ virtual bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 000000000000..65cd4fe95559
--- /dev/null
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,991 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasXS1A - This predicate is true when the target processor supports XS1A
+// instructions.
+def HasXS1A : Predicate<"Subtarget.isXS1A()">;
+
+// HasXS1B - This predicate is true when the target processor supports XS1B
+// instructions.
+def HasXS1B : Predicate<"Subtarget.isXS1B()">;
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def SDT_XCoreAddress : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+ []>;
+
+def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+ [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+ // Transformation function: imm/4
+ assert(N->getZExtValue() % 4 == 0);
+ return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+ // Transformation function: get the size of a mask
+ assert(isMask_32(N->getZExtValue()));
+ // look for the first non-zero bit
+ return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm
+ uint32_t value = N->getZExtValue();
+ return getI32Imm(-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+ // Transformation function: -imm/4
+ uint32_t value = N->getZExtValue();
+ assert(-value % 4 == 0);
+ return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+ return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+ return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+// FIXME check subtarget. Currently we check if the immediate
+// is in the common subset of legal immediate values for both
+// XS1A and XS1B.
+def immMskBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8)
+ || msksize == 16
+ || msksize == 24
+ || msksize == 32;
+}]>;
+
+// FIXME check subtarget. Currently we check if the immediate
+// is in the common subset of legal immediate values for both
+// XS1A and XS1B.
+def immBitp : PatLeaf<(imm), [{
+ uint32_t value = (uint32_t)N->getZExtValue();
+ return (value >= 1 && value <= 8)
+ || value == 16
+ || value == 24
+ || value == 32;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+ (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+ (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+ []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+ []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+ def _3r: _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _2rus : _F2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+ def _l3r: _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+ def _l2rus : _FL2RUS<
+ (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+ (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+ !strconcat(OpcStr, " $dst, $b, $c"),
+ [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs), (ins GRRegs:$cond, brtarget:$dest),
+ !strconcat(OpcStr, " $cond, $dest"),
+ []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+ def _ru6: _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+ def _lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$a),
+ !strconcat(OpcStr, " $dst, cp[$a]"),
+ []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(OpNode immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+ def _u10: _FU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+ def _lu10: _FLU10<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+ (outs GRRegs:$dst), (ins GRRegs:$b),
+ !strconcat(OpcStr, " $dst, $b"),
+ []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDWFI $dst, $addr",
+ [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+ "${:comment} LDAWFI $dst, $addr",
+ [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+ "${:comment} STWFI $src, $addr",
+ [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in {
+ def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+ (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+ "${:comment} SELECT_CC PSEUDO!",
+ [(set GRRegs:$dst,
+ (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+ "ldw $dst, $addr[$offset]",
+ []>;
+
+def LD16S_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld16s $dst, $addr[$offset]",
+ []>;
+
+def LD8U_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ld8u $dst, $addr[$offset]",
+ []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+ "stw $val, $addr[$offset]",
+ []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[$offset]",
+ []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+ (ins GRRegs:$addr, i32imm:$offset),
+ "ldaw $dst, $addr[-$offset]",
+ []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[$offset]",
+ [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+ "lda16 $dst, $addr[-$offset]",
+ [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+// TODO crc32, crc8, inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st16 $val, $addr[$offset]",
+ []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+ "st8 $val, $addr[$offset]",
+ []>;
+}
+
+// Four operand long
+let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccu $dst1, $dst2, $src3, $src4",
+ []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "maccs $dst1, $dst2, $src3, $src4",
+ []>;
+}
+
+// Five operand long
+
+let Predicates = [HasXS1B] in {
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ladd $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "lsub $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+ "ldiv $dst1, $dst2, $src1, $src2, $src3",
+ []>;
+}
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+let Predicates = [HasXS1A] in
+def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+ (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+ GRRegs:$src4),
+ "macc $dst1, $dst2, $src1, $src2, $src3, $src4",
+ []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ []>;
+
+let isReMaterializable = 1 in
+def LDAWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldaw $dst, dp[$a]",
+ [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ []>;
+
+def LDWDP_lru6: _FLRU6<
+ (outs GRRegs:$dst), (ins MEMii:$a),
+ "ldw $dst, dp[$a]",
+ [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+ "stw $val, dp[$addr]",
+ [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+ (outs), (ins GRRegs:$val, i32imm:$index),
+ "stw $val, sp[$index]",
+ [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+
+def LDWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldw $dst, sp[$b]",
+ []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+ (outs RRegs:$dst), (ins i32imm:$b),
+ "ldaw $dst, sp[$b]",
+ []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+ (outs GRRegs:$dst), (ins i32imm:$b),
+ "ldc $dst, $b",
+ [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+// Operand register - U6
+// TODO setc
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat, setsr
+// clrsr, getsr, kalli
+let isBranch = 1, isTerminator = 1 in {
+def BRBU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRBU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_u6 : _FU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+
+def BRFU_lu6 : _FLU6<
+ (outs),
+ (ins brtarget:$target),
+ "bu $target",
+ []>;
+}
+
+//let Uses = [CP] in ...
+let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1,
+ isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ []>;
+
+let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+ (outs), (ins MEMii:$a),
+ "ldaw r11, cp[$a]",
+ [(set R11, ADDRcpii:$a)]>;
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+ (outs),
+ (ins i32imm:$addr),
+ "ldap r11, $addr",
+ [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let isCall=1,
+// All calls clobber the the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BL_u10 : _FU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+ (outs),
+ (ins calltarget:$target, variable_ops),
+ "bl $target",
+ [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO getr, getst
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "not $dst, $b",
+ [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+ "neg $dst, $b",
+ [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
+// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
+// tsetmr, sext (reg), zext (reg)
+let isTwoAddress = 1 in {
+let neverHasSideEffects = 1 in
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "sext $dst, $src2",
+ []>;
+
+let neverHasSideEffects = 1 in
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+ "zext $dst, $src2",
+ []>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+ "andnot $dst, $src2",
+ [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+ "mkmsk $dst, $size",
+ []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+ "mkmsk $dst, $size",
+ [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+// Two operand long
+// TODO settw, setclk, setrdy, setpsc, endin, peek,
+// getd, testlcl, tinitlr, getps, setps
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "bitrev $dst, $src",
+ [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "byterev $dst, $src",
+ [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "clz $dst, $src",
+ [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
+// bru, setdp, setcp, setv, setev, kcall
+// dgetreg
+let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+ "bau $addr",
+ [(brind GRRegs:$addr)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+ "set sp, $src",
+ []>;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallt $src",
+ []>;
+
+let isBarrier = 1, hasCtrlDep = 1 in
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+ "ecallf $src",
+ []>;
+
+let isCall=1,
+// All calls clobber the the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+ "bla $addr",
+ [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+// Zero operand short
+// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+let Defs = [R11] in
+def GETID_0R : _F0R<(outs), (ins),
+ "get r11, id",
+ [(set R11, (int_xcore_getid))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(zextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+ (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+ (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+ (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+ (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+ (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+ (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+ (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+ (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+ (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+ (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+ (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+ (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+ (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+ (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+ (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+ (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+ (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+ (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+ (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+ (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+ (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+ (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+ (ASHR_l2rus GRRegs:$src, 32)>;
+
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 000000000000..43adb0f917c9
--- /dev/null
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+ bool UsesLR;
+ int LRSpillSlot;
+ int FPSpillSlot;
+ int VarArgsFrameIndex;
+ std::vector<std::pair<unsigned, CalleeSavedInfo> > SpillLabels;
+
+public:
+ XCoreFunctionInfo() :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ XCoreFunctionInfo(MachineFunction &MF) :
+ UsesLR(false),
+ LRSpillSlot(0),
+ FPSpillSlot(0),
+ VarArgsFrameIndex(0) {}
+
+ ~XCoreFunctionInfo() {}
+
+ void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+
+ void setUsesLR(bool val) { UsesLR = val; }
+ bool getUsesLR() const { return UsesLR; }
+
+ void setLRSpillSlot(int off) { LRSpillSlot = off; }
+ int getLRSpillSlot() const { return LRSpillSlot; }
+
+ void setFPSpillSlot(int off) { FPSpillSlot = off; }
+ int getFPSpillSlot() const { return FPSpillSlot; }
+
+ std::vector<std::pair<unsigned, CalleeSavedInfo> >&getSpillLabels() {
+ return SpillLabels;
+ }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 000000000000..82cd92d5685c
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,598 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+ : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+ TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+ return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+ return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+ return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+ XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+ return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+ return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF)
+{
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ return (MMI && MMI->hasDebugInfo()) ||
+ !MF.getFunction()->doesNotThrow() ||
+ UnwindTablesMandatory;
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+ const {
+ static const unsigned CalleeSavedRegs[] = {
+ XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+ XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+ 0
+ };
+ return CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const*
+XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
+ XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass,
+ 0
+ };
+ return CalleeSavedRegClasses;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(XCore::CP);
+ Reserved.set(XCore::DP);
+ Reserved.set(XCore::SP);
+ Reserved.set(XCore::LR);
+ if (hasFP(MF)) {
+ Reserved.set(XCore::R10);
+ }
+ return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+ // TODO can we estimate stack size?
+ return hasFP(MF);
+}
+
+bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ if (!hasReservedCallFrame(MF)) {
+ // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+ // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+ MachineInstr *Old = I;
+ uint64_t Amount = Old->getOperand(0).getImm();
+ if (Amount != 0) {
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ Amount = (Amount+Align-1)/Align*Align;
+
+ assert(Amount%4 == 0);
+ Amount /= 4;
+
+ bool isU6 = isImmU6(Amount);
+
+ if (!isU6 && !isImmU16(Amount)) {
+ // FIX could emit multiple instructions in this case.
+ cerr << "eliminateCallFramePseudoInstr size too big: "
+ << Amount << "\n";
+ abort();
+ }
+
+ MachineInstr *New;
+ if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+ int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+ .addImm(Amount);
+ } else {
+ assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+ int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+ .addImm(Amount);
+ }
+
+ // Replace the pseudo instruction with a new instruction...
+ MBB.insert(I, New);
+ }
+ }
+
+ MBB.erase(I);
+}
+
+void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+ MachineInstr &MI = *II;
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &FrameOp = MI.getOperand(i);
+ int FrameIndex = FrameOp.getIndex();
+
+ MachineFunction &MF = *MI.getParent()->getParent();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+ int StackSize = MF.getFrameInfo()->getStackSize();
+
+ #ifndef NDEBUG
+ DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ DOUT << "<--------->\n";
+ MI.print(DOUT);
+ DOUT << "FrameIndex : " << FrameIndex << "\n";
+ DOUT << "FrameOffset : " << Offset << "\n";
+ DOUT << "StackSize : " << StackSize << "\n";
+ #endif
+
+ Offset += StackSize;
+
+ // fold constant into offset.
+ Offset += MI.getOperand(i + 1).getImm();
+ MI.getOperand(i + 1).ChangeToImmediate(0);
+
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+
+ #ifndef NDEBUG
+ DOUT << "Offset : " << Offset << "\n";
+ DOUT << "<--------->\n";
+ #endif
+
+ Offset/=4;
+
+ bool FP = hasFP(MF);
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+ assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+ "Unexpected register operand");
+
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ if (FP) {
+ bool isUs = isImmUs(Offset);
+ unsigned FramePtr = XCore::R10;
+
+ MachineInstr *New = 0;
+ if (!isUs) {
+ if (!RS) {
+ cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
+ abort();
+ }
+ unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+ SPAdj);
+ loadConstant(MBB, II, ScratchReg, Offset, dl);
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::STWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ case XCore::LDAWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+ .addReg(FramePtr)
+ .addReg(ScratchReg, RegState::Kill);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ } else {
+ switch (MI.getOpcode()) {
+ case XCore::LDWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+ .addReg(Reg, getKillRegState(isKill))
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+ .addReg(FramePtr)
+ .addImm(Offset);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ }
+ } else {
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ // FIXME could make this work for LDWSP, LDAWSP.
+ cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n";
+ abort();
+ }
+
+ switch (MI.getOpcode()) {
+ int NewOpcode;
+ case XCore::LDWFI:
+ NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ case XCore::STWFI:
+ NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode))
+ .addReg(Reg, getKillRegState(isKill))
+ .addImm(Offset);
+ break;
+ case XCore::LDAWFI:
+ NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+ BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+ .addImm(Offset);
+ break;
+ default:
+ assert(0 && "Unexpected Opcode\n");
+ }
+ }
+ // Erase old instruction.
+ MBB.erase(II);
+}
+
+void
+XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+ const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ if (LRUsed) {
+ MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+ bool isVarArg = MF.getFunction()->isVarArg();
+ int FrameIdx;
+ if (! isVarArg) {
+ // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0);
+ } else {
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ }
+ XFI->setUsesLR(FrameIdx);
+ XFI->setLRSpillSlot(FrameIdx);
+ }
+ if (requiresRegisterScavenging(MF)) {
+ // Reserve a slot close to SP or frame pointer.
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+ if (hasFP(MF)) {
+ // A callee save register is used to hold the FP.
+ // This needs saving / restoring in the epilogue / prologue.
+ XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment()));
+ }
+}
+
+void XCoreRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const {
+ // TODO use mkmsk if possible.
+ if (!isImmU16(Value)) {
+ // TODO use constant pool.
+ cerr << "loadConstant value too big " << Value << "\n";
+ abort();
+ }
+ int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+void XCoreRegisterInfo::
+storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ cerr << "storeToStack offset too big " << Offset << "\n";
+ abort();
+ }
+ int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode))
+ .addReg(SrcReg)
+ .addImm(Offset);
+}
+
+void XCoreRegisterInfo::
+loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const {
+ assert(Offset%4 == 0 && "Misaligned stack offset");
+ Offset/=4;
+ bool isU6 = isImmU6(Offset);
+ if (!isU6 && !isImmU16(Offset)) {
+ cerr << "loadFromStack offset too big " << Offset << "\n";
+ abort();
+ }
+ int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+ BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+ .addImm(Offset);
+}
+
+void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ bool FP = hasFP(MF);
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ cerr << "emitPrologue Frame size too big: " << FrameSize << "\n";
+ abort();
+ }
+ bool emitFrameMoves = needsFrameMoves(MF);
+
+ // Do we need to allocate space on the stack?
+ if (FrameSize) {
+ bool saveLR = XFI->getUsesLR();
+ bool LRSavedOnEntry = false;
+ int Opcode;
+ if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+ Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+ MBB.addLiveIn(XCore::LR);
+ saveLR = false;
+ LRSavedOnEntry = true;
+ } else {
+ Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+ if (emitFrameMoves) {
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+ // Show update of SP.
+ unsigned FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+ Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+
+ if (LRSavedOnEntry) {
+ MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+ MachineLocation CSSrc(XCore::LR);
+ Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ }
+ }
+ if (saveLR) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl);
+ MBB.addLiveIn(XCore::LR);
+
+ if (emitFrameMoves) {
+ unsigned SaveLRLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId);
+ MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+ MachineLocation CSSrc(XCore::LR);
+ MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId,
+ CSDst, CSSrc));
+ }
+ }
+ }
+
+ if (FP) {
+ // Save R10 to the stack.
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl);
+ // R10 is live-in. It is killed at the spill.
+ MBB.addLiveIn(XCore::R10);
+ if (emitFrameMoves) {
+ unsigned SaveR10LabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId);
+ MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+ MachineLocation CSSrc(XCore::R10);
+ MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId,
+ CSDst, CSSrc));
+ }
+ // Set the FP from the SP.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+ .addImm(0);
+ if (emitFrameMoves) {
+ // Show FP is now valid.
+ unsigned FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+ MachineLocation SPDst(FramePtr);
+ MachineLocation SPSrc(MachineLocation::VirtualFP);
+ MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ unsigned SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = CSI.getReg();
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+ }
+ }
+}
+
+void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ bool FP = hasFP(MF);
+
+ if (FP) {
+ // Restore the stack pointer.
+ unsigned FramePtr = XCore::R10;
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+ .addReg(FramePtr);
+ }
+
+ // Work out frame sizes.
+ int FrameSize = MFI->getStackSize();
+
+ assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+ FrameSize/=4;
+
+ bool isU6 = isImmU6(FrameSize);
+
+ if (!isU6 && !isImmU16(FrameSize)) {
+ // FIXME could emit multiple instructions.
+ cerr << "emitEpilogue Frame size too big: " << FrameSize << "\n";
+ abort();
+ }
+
+ if (FrameSize) {
+ XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+ if (FP) {
+ // Restore R10
+ int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+ FPSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl);
+ }
+ bool restoreLR = XFI->getUsesLR();
+ if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+ int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+ LRSpillOffset += FrameSize*4;
+ loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl);
+ restoreLR = false;
+ }
+ if (restoreLR) {
+ // Fold prologue into return instruction
+ assert(MBBI->getOpcode() == XCore::RETSP_u6
+ || MBBI->getOpcode() == XCore::RETSP_lu6);
+ int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+ MBB.erase(MBBI);
+ } else {
+ int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+ BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+ }
+ }
+}
+
+int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+ bool FP = hasFP(MF);
+
+ return FP ? XCore::R10 : XCore::SP;
+}
+
+unsigned XCoreRegisterInfo::getRARegister() const {
+ return XCore::LR;
+}
+
+void XCoreRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves)
+ const {
+ // Initial state of the frame pointer is SP.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(XCore::SP, 0);
+ Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+#include "XCoreGenRegisterInfo.inc"
+
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 000000000000..00b7caa96bc6
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,94 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "XCoreGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+ void loadConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+ void storeToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+ void loadFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+ XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+ /// Code Generation virtual methods...
+
+ const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+ const TargetRegisterClass* const* getCalleeSavedRegClasses(
+ const MachineFunction *MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ // Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(MachineFunction &MF) const;
+ void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+ //! Return the array of argument passing registers
+ /*!
+ \note The size of this array is returned by getArgRegsSize().
+ */
+ static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+ //! Return the size of the argument passing register array
+ static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+
+ //! Return whether to emit frame moves
+ static bool needsFrameMoves(const MachineFunction &MF);
+
+ //! Get DWARF debugging register number
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 000000000000..62daf5d4567b
--- /dev/null
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,91 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the XCore register file
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+ field bits<4> Num;
+ let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+ let Num = num;
+}
+
+// CPU registers
+def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>;
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+ // Return values and arguments
+ [R0, R1, R2, R3,
+ // Not preserved across procedure calls
+ R11,
+ // Callee save
+ R4, R5, R6, R7, R8, R9, R10]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ GRRegsClass::iterator
+ GRRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ GRRegsClass::iterator
+ GRRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->hasFP(MF))
+ return end()-1; // don't allocate R10
+ else
+ return end();
+ }
+ }];
+}
+
+def RRegs : RegisterClass<"XCore", [i32], 32,
+ // Reserved
+ [CP, DP, SP, LR]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ RRegsClass::iterator
+ RRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+ return begin();
+ }
+ RRegsClass::iterator
+ RRegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // No allocatable registers
+ return begin();
+ }
+ }];
+}
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 000000000000..dc53da4ddf0b
--- /dev/null
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,28 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+#include "XCoreGenSubtarget.inc"
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS)
+ : IsXS1A(false),
+ IsXS1B(false)
+{
+ std::string CPU = "xs1a-generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 000000000000..ff6475baa810
--- /dev/null
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,46 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+class XCoreSubtarget : public TargetSubtarget {
+ bool IsXS1A;
+ bool IsXS1B;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ XCoreSubtarget(const TargetMachine &TM, const Module &M,
+ const std::string &FS);
+
+ bool isXS1A() const { return IsXS1A; }
+ bool isXS1B() const { return IsXS1B; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
new file mode 100644
index 000000000000..55137621deae
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -0,0 +1,201 @@
+//===-- XCoreTargetAsmInfo.cpp - XCore asm properties -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the XCoreTargetAsmInfo properties.
+// We use the small section flag for the CP relative and DP relative
+// flags. If a section is small and writable then it is DP relative. If a
+// section is small and not writable then it is CP relative.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
+ : ELFTargetAsmInfo(TM),
+ Subtarget(TM.getSubtargetImpl()) {
+ TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
+ DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
+ SectionFlags::Small);
+ BSSSection_ = getNamedSection("\t.dp.bss", SectionFlags::Writeable |
+ SectionFlags::BSS | SectionFlags::Small);
+ if (Subtarget->isXS1A()) {
+ ReadOnlySection = getNamedSection("\t.dp.rodata", SectionFlags::None |
+ SectionFlags::Writeable |
+ SectionFlags::Small);
+ } else {
+ ReadOnlySection = getNamedSection("\t.cp.rodata", SectionFlags::None |
+ SectionFlags::Small);
+ }
+ Data16bitsDirective = "\t.short\t";
+ Data32bitsDirective = "\t.long\t";
+ Data64bitsDirective = 0;
+ ZeroDirective = "\t.space\t";
+ CommentString = "#";
+ ConstantPoolSection = "\t.section\t.cp.rodata,\"ac\",@progbits";
+ JumpTableDataSection = "\t.section\t.dp.data,\"awd\",@progbits";
+ PrivateGlobalPrefix = ".L";
+ AscizDirective = ".asciiz";
+ WeakDefDirective = "\t.weak\t";
+ WeakRefDirective = "\t.weak\t";
+ SetDirective = "\t.set\t";
+
+ // Debug
+ HasLEB128 = true;
+ AbsoluteDebugSectionOffsets = true;
+
+ DwarfAbbrevSection = "\t.section\t.debug_abbrev,\"\",@progbits";
+ DwarfInfoSection = "\t.section\t.debug_info,\"\",@progbits";
+ DwarfLineSection = "\t.section\t.debug_line,\"\",@progbits";
+ DwarfFrameSection = "\t.section\t.debug_frame,\"\",@progbits";
+ DwarfPubNamesSection = "\t.section\t.debug_pubnames,\"\",@progbits";
+ DwarfPubTypesSection = "\t.section\t.debug_pubtypes,\"\",@progbits";
+ DwarfStrSection = "\t.section\t.debug_str,\"\",@progbits";
+ DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
+ DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
+ DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
+ DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+}
+
+const Section*
+XCoreTargetAsmInfo::SelectSectionForGlobal(const GlobalValue *GV) const {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ {
+ if (!GVar->isWeakForLinker()) {
+ switch (Kind) {
+ case SectionKind::RODataMergeStr:
+ return MergeableStringSection(GVar);
+ case SectionKind::RODataMergeConst:
+ return getReadOnlySection();
+ case SectionKind::ThreadData:
+ return DataSection;
+ case SectionKind::ThreadBSS:
+ return getBSSSection_();
+ default:
+ break;
+ }
+ }
+ }
+ return ELFTargetAsmInfo::SelectSectionForGlobal(GV);
+}
+
+const Section*
+XCoreTargetAsmInfo::SelectSectionForMachineConst(const Type *Ty) const {
+ return MergeableConstSection(Ty);
+}
+
+const Section*
+XCoreTargetAsmInfo::MergeableConstSection(const GlobalVariable *GV) const {
+ Constant *C = GV->getInitializer();
+ return MergeableConstSection(C->getType());
+}
+
+inline const Section*
+XCoreTargetAsmInfo::MergeableConstSection(const Type *Ty) const {
+ const TargetData *TD = TM.getTargetData();
+
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ if (Size == 4 || Size == 8 || Size == 16) {
+ std::string Name = ".cp.const" + utostr(Size);
+
+ return getNamedSection(Name.c_str(),
+ SectionFlags::setEntitySize(SectionFlags::Mergeable |
+ SectionFlags::Small,
+ Size));
+ }
+
+ return getReadOnlySection();
+}
+
+const Section* XCoreTargetAsmInfo::
+MergeableStringSection(const GlobalVariable *GV) const {
+ // FIXME insert in correct mergable section
+ return getReadOnlySection();
+}
+
+unsigned XCoreTargetAsmInfo::
+SectionFlagsForGlobal(const GlobalValue *GV,
+ const char* Name) const {
+ unsigned Flags = ELFTargetAsmInfo::SectionFlagsForGlobal(GV, Name);
+ // Mask out unsupported flags
+ Flags &= ~(SectionFlags::Small | SectionFlags::TLS);
+
+ // Set CP / DP relative flags
+ if (GV) {
+ SectionKind::Kind Kind = SectionKindForGlobal(GV);
+ switch (Kind) {
+ case SectionKind::ThreadData:
+ case SectionKind::ThreadBSS:
+ case SectionKind::Data:
+ case SectionKind::BSS:
+ case SectionKind::SmallData:
+ case SectionKind::SmallBSS:
+ Flags |= SectionFlags::Small;
+ break;
+ case SectionKind::ROData:
+ case SectionKind::RODataMergeStr:
+ case SectionKind::SmallROData:
+ if (Subtarget->isXS1A()) {
+ Flags |= SectionFlags::Writeable;
+ }
+ Flags |=SectionFlags::Small;
+ break;
+ case SectionKind::RODataMergeConst:
+ Flags |=SectionFlags::Small;
+ default:
+ break;
+ }
+ }
+
+ return Flags;
+}
+
+std::string XCoreTargetAsmInfo::
+printSectionFlags(unsigned flags) const {
+ std::string Flags = ",\"";
+
+ if (!(flags & SectionFlags::Debug))
+ Flags += 'a';
+ if (flags & SectionFlags::Code)
+ Flags += 'x';
+ if (flags & SectionFlags::Writeable)
+ Flags += 'w';
+ if (flags & SectionFlags::Mergeable)
+ Flags += 'M';
+ if (flags & SectionFlags::Strings)
+ Flags += 'S';
+ if (flags & SectionFlags::TLS)
+ Flags += 'T';
+ if (flags & SectionFlags::Small) {
+ if (flags & SectionFlags::Writeable)
+ Flags += 'd'; // DP relative
+ else
+ Flags += 'c'; // CP relative
+ }
+
+ Flags += "\",";
+
+ Flags += '@';
+
+ if (flags & SectionFlags::BSS)
+ Flags += "nobits";
+ else
+ Flags += "progbits";
+
+ if (unsigned entitySize = SectionFlags::getEntitySize(flags))
+ Flags += "," + utostr(entitySize);
+
+ return Flags;
+}
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.h b/lib/Target/XCore/XCoreTargetAsmInfo.h
new file mode 100644
index 000000000000..79fd36aa23ce
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.h
@@ -0,0 +1,45 @@
+//=====-- XCoreTargetAsmInfo.h - XCore asm properties ---------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/Target/ELFTargetAsmInfo.h"
+
+namespace llvm {
+
+ // Forward declarations.
+ class XCoreTargetMachine;
+ class XCoreSubtarget;
+
+ class XCoreTargetAsmInfo : public ELFTargetAsmInfo {
+ private:
+ const XCoreSubtarget *Subtarget;
+ public:
+ explicit XCoreTargetAsmInfo(const XCoreTargetMachine &TM);
+
+ virtual const Section* SelectSectionForGlobal(const GlobalValue *GV) const;
+ virtual std::string printSectionFlags(unsigned flags) const;
+ const Section* MergeableConstSection(const GlobalVariable *GV) const;
+ inline const Section* MergeableConstSection(const Type *Ty) const;
+ const Section* MergeableStringSection(const GlobalVariable *GV) const;
+ virtual const Section*
+ SelectSectionForMachineConst(const Type *Ty) const;
+ virtual unsigned
+ SectionFlagsForGlobal(const GlobalValue *GV = NULL,
+ const char* name = NULL) const;
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 000000000000..5437c574a80f
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,71 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library. In particular, it seems that it is not possible to get
+/// things to work on Win32 without this. Though it is unused, do not
+/// remove it.
+extern "C" int XCoreTargetMachineModule;
+int XCoreTargetMachineModule = 0;
+
+namespace {
+ // Register the target.
+ RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
+}
+
+const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
+ return new XCoreTargetAsmInfo(*this);
+}
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS)
+ : Subtarget(*this, M, FS),
+ DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+ "i16:16:32-i32:32:32-i64:32:32"),
+ InstrInfo(),
+ FrameInfo(*this),
+ TLInfo(*this) {
+}
+
+unsigned XCoreTargetMachine::getModuleMatchQuality(const Module &M) {
+ std::string TT = M.getTargetTriple();
+ if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "xcore-")
+ return 20;
+
+ // Otherwise we don't match.
+ return 0;
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ PM.add(createXCoreISelDag(*this));
+ return false;
+}
+
+bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose,
+ raw_ostream &Out) {
+ // Output assembly language.
+ PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose));
+ return false;
+}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 000000000000..2385aedc9079
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,63 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+ XCoreSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameInfo FrameInfo;
+ XCoreTargetLowering TLInfo;
+
+protected:
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+ XCoreTargetMachine(const Module &M, const std::string &FS);
+
+ virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; }
+ virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ virtual XCoreTargetLowering *getTargetLowering() const {
+ return const_cast<XCoreTargetLowering*>(&TLInfo);
+ }
+
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+ static unsigned getModuleMatchQuality(const Module &M);
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addAssemblyEmitter(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool Verbose, raw_ostream &Out);
+};
+
+} // end namespace llvm
+
+#endif